| import os, select |
| import virt_utils, virt_vm, aexpect |
| |
| |
| class scheduler: |
| """ |
| A scheduler that manages several parallel test execution pipelines on a |
| single host. |
| """ |
| |
| def __init__(self, tests, num_workers, total_cpus, total_mem, bindir): |
| """ |
| Initialize the class. |
| |
| @param tests: A list of test dictionaries. |
| @param num_workers: The number of workers (pipelines). |
| @param total_cpus: The total number of CPUs to dedicate to tests. |
| @param total_mem: The total amount of memory to dedicate to tests. |
| @param bindir: The directory where environment files reside. |
| """ |
| self.tests = tests |
| self.num_workers = num_workers |
| self.total_cpus = total_cpus |
| self.total_mem = total_mem |
| self.bindir = bindir |
| # Pipes -- s stands for scheduler, w stands for worker |
| self.s2w = [os.pipe() for i in range(num_workers)] |
| self.w2s = [os.pipe() for i in range(num_workers)] |
| self.s2w_r = [os.fdopen(r, "r", 0) for r, w in self.s2w] |
| self.s2w_w = [os.fdopen(w, "w", 0) for r, w in self.s2w] |
| self.w2s_r = [os.fdopen(r, "r", 0) for r, w in self.w2s] |
| self.w2s_w = [os.fdopen(w, "w", 0) for r, w in self.w2s] |
| # "Personal" worker dicts contain modifications that are applied |
| # specifically to each worker. For example, each worker must use a |
| # different environment file and a different MAC address pool. |
| self.worker_dicts = [{"env": "env%d" % i} for i in range(num_workers)] |
| |
| |
| def worker(self, index, run_test_func): |
| """ |
| The worker function. |
| |
| Waits for commands from the scheduler and processes them. |
| |
| @param index: The index of this worker (in the range 0..num_workers-1). |
| @param run_test_func: A function to be called to run a test |
| (e.g. job.run_test). |
| """ |
| r = self.s2w_r[index] |
| w = self.w2s_w[index] |
| self_dict = self.worker_dicts[index] |
| |
| # Inform the scheduler this worker is ready |
| w.write("ready\n") |
| |
| while True: |
| cmd = r.readline().split() |
| if not cmd: |
| continue |
| |
| # The scheduler wants this worker to run a test |
| if cmd[0] == "run": |
| test_index = int(cmd[1]) |
| test = self.tests[test_index].copy() |
| test.update(self_dict) |
| test_iterations = int(test.get("iterations", 1)) |
| status = run_test_func("kvm", params=test, |
| tag=test.get("shortname"), |
| iterations=test_iterations) |
| w.write("done %s %s\n" % (test_index, status)) |
| w.write("ready\n") |
| |
| # The scheduler wants this worker to free its used resources |
| elif cmd[0] == "cleanup": |
| env_filename = os.path.join(self.bindir, self_dict["env"]) |
| env = virt_utils.Env(env_filename) |
| for obj in env.values(): |
| if isinstance(obj, virt_vm.VM): |
| obj.destroy() |
| elif isinstance(obj, aexpect.Spawn): |
| obj.close() |
| env.save() |
| w.write("cleanup_done\n") |
| w.write("ready\n") |
| |
| # There's no more work for this worker |
| elif cmd[0] == "terminate": |
| break |
| |
| |
| def scheduler(self): |
| """ |
| The scheduler function. |
| |
| Sends commands to workers, telling them to run tests, clean up or |
| terminate execution. |
| """ |
| idle_workers = [] |
| closing_workers = [] |
| test_status = ["waiting"] * len(self.tests) |
| test_worker = [None] * len(self.tests) |
| used_cpus = [0] * self.num_workers |
| used_mem = [0] * self.num_workers |
| |
| while True: |
| # Wait for a message from a worker |
| r, w, x = select.select(self.w2s_r, [], []) |
| |
| someone_is_ready = False |
| |
| for pipe in r: |
| worker_index = self.w2s_r.index(pipe) |
| msg = pipe.readline().split() |
| if not msg: |
| continue |
| |
| # A worker is ready -- add it to the idle_workers list |
| if msg[0] == "ready": |
| idle_workers.append(worker_index) |
| someone_is_ready = True |
| |
| # A worker completed a test |
| elif msg[0] == "done": |
| test_index = int(msg[1]) |
| test = self.tests[test_index] |
| status = int(eval(msg[2])) |
| test_status[test_index] = ("fail", "pass")[status] |
| # If the test failed, mark all dependent tests as "failed" too |
| if not status: |
| for i, other_test in enumerate(self.tests): |
| for dep in other_test.get("dep", []): |
| if dep in test["name"]: |
| test_status[i] = "fail" |
| |
| # A worker is done shutting down its VMs and other processes |
| elif msg[0] == "cleanup_done": |
| used_cpus[worker_index] = 0 |
| used_mem[worker_index] = 0 |
| closing_workers.remove(worker_index) |
| |
| if not someone_is_ready: |
| continue |
| |
| for worker in idle_workers[:]: |
| # Find a test for this worker |
| test_found = False |
| for i, test in enumerate(self.tests): |
| # We only want "waiting" tests |
| if test_status[i] != "waiting": |
| continue |
| # Make sure the test isn't assigned to another worker |
| if test_worker[i] is not None and test_worker[i] != worker: |
| continue |
| # Make sure the test's dependencies are satisfied |
| dependencies_satisfied = True |
| for dep in test["dep"]: |
| dependencies = [j for j, t in enumerate(self.tests) |
| if dep in t["name"]] |
| bad_status_deps = [j for j in dependencies |
| if test_status[j] != "pass"] |
| if bad_status_deps: |
| dependencies_satisfied = False |
| break |
| if not dependencies_satisfied: |
| continue |
| # Make sure we have enough resources to run the test |
| test_used_cpus = int(test.get("used_cpus", 1)) |
| test_used_mem = int(test.get("used_mem", 128)) |
| # First make sure the other workers aren't using too many |
| # CPUs (not including the workers currently shutting down) |
| uc = (sum(used_cpus) - used_cpus[worker] - |
| sum(used_cpus[i] for i in closing_workers)) |
| if uc and uc + test_used_cpus > self.total_cpus: |
| continue |
| # ... or too much memory |
| um = (sum(used_mem) - used_mem[worker] - |
| sum(used_mem[i] for i in closing_workers)) |
| if um and um + test_used_mem > self.total_mem: |
| continue |
| # If we reached this point it means there are, or will |
| # soon be, enough resources to run the test |
| test_found = True |
| # Now check if the test can be run right now, i.e. if the |
| # other workers, including the ones currently shutting |
| # down, aren't using too many CPUs |
| uc = (sum(used_cpus) - used_cpus[worker]) |
| if uc and uc + test_used_cpus > self.total_cpus: |
| continue |
| # ... or too much memory |
| um = (sum(used_mem) - used_mem[worker]) |
| if um and um + test_used_mem > self.total_mem: |
| continue |
| # Everything is OK -- run the test |
| test_status[i] = "running" |
| test_worker[i] = worker |
| idle_workers.remove(worker) |
| # Update used_cpus and used_mem |
| used_cpus[worker] = test_used_cpus |
| used_mem[worker] = test_used_mem |
| # Assign all related tests to this worker |
| for j, other_test in enumerate(self.tests): |
| for other_dep in other_test["dep"]: |
| # All tests that depend on this test |
| if other_dep in test["name"]: |
| test_worker[j] = worker |
| break |
| # ... and all tests that share a dependency |
| # with this test |
| for dep in test["dep"]: |
| if dep in other_dep or other_dep in dep: |
| test_worker[j] = worker |
| break |
| # Tell the worker to run the test |
| self.s2w_w[worker].write("run %s\n" % i) |
| break |
| |
| # If there won't be any tests for this worker to run soon, tell |
| # the worker to free its used resources |
| if not test_found and (used_cpus[worker] or used_mem[worker]): |
| self.s2w_w[worker].write("cleanup\n") |
| idle_workers.remove(worker) |
| closing_workers.append(worker) |
| |
| # If there are no more new tests to run, terminate the workers and |
| # the scheduler |
| if len(idle_workers) == self.num_workers: |
| for worker in idle_workers: |
| self.s2w_w[worker].write("terminate\n") |
| break |