| # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Program to run emerge in parallel, for significant speedup. |
| |
| Usage: |
| ./parallel_emerge [--board=BOARD] [--workon=PKGS] |
| [--force-remote-binary=PKGS] [emerge args] package |
| |
| This script runs multiple emerge processes in parallel, using appropriate |
| Portage APIs. It is faster than standard emerge because it has a |
| multiprocess model instead of an asynchronous model. |
| """ |
| |
| from __future__ import print_function |
| |
| import codecs |
| import copy |
| import errno |
| import gc |
| import heapq |
| import multiprocessing |
| import os |
| try: |
| import Queue |
| except ImportError: |
| # Python-3 renamed to "queue". We still use Queue to avoid collisions |
| # with naming variables as "queue". Maybe we'll transition at some point. |
| # pylint: disable=F0401 |
| import queue as Queue |
| import signal |
| import subprocess |
| import sys |
| import tempfile |
| import threading |
| import time |
| import traceback |
| |
| from chromite.lib import cros_build_lib |
| from chromite.lib import cros_event |
| from chromite.lib import process_util |
| from chromite.lib import proctitle |
| |
| # If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On |
| # Chromium OS, the default "portage" user doesn't have the necessary |
| # permissions. It'd be easier if we could default to $USERNAME, but $USERNAME |
| # is "root" here because we get called through sudo. |
| # |
| # We need to set this before importing any portage modules, because portage |
| # looks up "PORTAGE_USERNAME" at import time. |
| # |
| # NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't |
| # encounter this case unless they have an old chroot or blow away the |
| # environment by running sudo without the -E specifier. |
| if "PORTAGE_USERNAME" not in os.environ: |
| homedir = os.environ.get("HOME") |
| if homedir: |
| os.environ["PORTAGE_USERNAME"] = os.path.basename(homedir) |
| |
| # Wrap Popen with a lock to ensure no two Popen are executed simultaneously in |
| # the same process. |
| # Two Popen call at the same time might be the cause for crbug.com/433482. |
| _popen_lock = threading.Lock() |
| _old_popen = subprocess.Popen |
| |
| def _LockedPopen(*args, **kwargs): |
| with _popen_lock: |
| return _old_popen(*args, **kwargs) |
| |
| subprocess.Popen = _LockedPopen |
| |
| # Portage doesn't expose dependency trees in its public API, so we have to |
| # make use of some private APIs here. These modules are found under |
| # /usr/lib/portage/pym/. |
| # |
| # TODO(davidjames): Update Portage to expose public APIs for these features. |
| # pylint: disable=F0401 |
| from _emerge.actions import adjust_configs |
| from _emerge.actions import load_emerge_config |
| from _emerge.create_depgraph_params import create_depgraph_params |
| from _emerge.depgraph import backtrack_depgraph |
| from _emerge.main import emerge_main |
| from _emerge.main import parse_opts |
| from _emerge.Package import Package |
| from _emerge.post_emerge import clean_logs |
| from _emerge.Scheduler import Scheduler |
| from _emerge.stdout_spinner import stdout_spinner |
| from portage._global_updates import _global_updates |
| import portage |
| import portage.debug |
| # pylint: enable=F0401 |
| |
| |
| def Usage(): |
| """Print usage.""" |
| print("Usage:") |
| print(" ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--rebuild]") |
| print(" [--eventlogfile=FILE] [emerge args] package") |
| print() |
| print("Packages specified as workon packages are always built from source.") |
| print() |
| print("The --workon argument is mainly useful when you want to build and") |
| print("install packages that you are working on unconditionally, but do not") |
| print("to have to rev the package to indicate you want to build it from") |
| print("source. The build_packages script will automatically supply the") |
| print("workon argument to emerge, ensuring that packages selected using") |
| print("cros-workon are rebuilt.") |
| print() |
| print("The --rebuild option rebuilds packages whenever their dependencies") |
| print("are changed. This ensures that your build is correct.") |
| print() |
| print("The --eventlogfile writes events to the given file. File is") |
| print("is overwritten if it exists.") |
| |
| |
| # Global start time |
| GLOBAL_START = time.time() |
| |
| # Whether process has been killed by a signal. |
| KILLED = multiprocessing.Event() |
| |
| |
| class EmergeData(object): |
| """This simple struct holds various emerge variables. |
| |
| This struct helps us easily pass emerge variables around as a unit. |
| These variables are used for calculating dependencies and installing |
| packages. |
| """ |
| |
| __slots__ = ["action", "cmdline_packages", "depgraph", "favorites", |
| "mtimedb", "opts", "root_config", "scheduler_graph", |
| "settings", "spinner", "trees"] |
| |
| def __init__(self): |
| # The action the user requested. If the user is installing packages, this |
| # is None. If the user is doing anything other than installing packages, |
| # this will contain the action name, which will map exactly to the |
| # long-form name of the associated emerge option. |
| # |
| # Example: If you call parallel_emerge --unmerge package, the action name |
| # will be "unmerge" |
| self.action = None |
| |
| # The list of packages the user passed on the command-line. |
| self.cmdline_packages = None |
| |
| # The emerge dependency graph. It'll contain all the packages involved in |
| # this merge, along with their versions. |
| self.depgraph = None |
| |
| # The list of candidates to add to the world file. |
| self.favorites = None |
| |
| # A dict of the options passed to emerge. This dict has been cleaned up |
| # a bit by parse_opts, so that it's a bit easier for the emerge code to |
| # look at the options. |
| # |
| # Emerge takes a few shortcuts in its cleanup process to make parsing of |
| # the options dict easier. For example, if you pass in "--usepkg=n", the |
| # "--usepkg" flag is just left out of the dictionary altogether. Because |
| # --usepkg=n is the default, this makes parsing easier, because emerge |
| # can just assume that if "--usepkg" is in the dictionary, it's enabled. |
| # |
| # These cleanup processes aren't applied to all options. For example, the |
| # --with-bdeps flag is passed in as-is. For a full list of the cleanups |
| # applied by emerge, see the parse_opts function in the _emerge.main |
| # package. |
| self.opts = None |
| |
| # A dictionary used by portage to maintain global state. This state is |
| # loaded from disk when portage starts up, and saved to disk whenever we |
| # call mtimedb.commit(). |
| # |
| # This database contains information about global updates (i.e., what |
| # version of portage we have) and what we're currently doing. Portage |
| # saves what it is currently doing in this database so that it can be |
| # resumed when you call it with the --resume option. |
| # |
| # parallel_emerge does not save what it is currently doing in the mtimedb, |
| # so we do not support the --resume option. |
| self.mtimedb = None |
| |
| # The portage configuration for our current root. This contains the portage |
| # settings (see below) and the three portage trees for our current root. |
| # (The three portage trees are explained below, in the documentation for |
| # the "trees" member.) |
| self.root_config = None |
| |
| # The scheduler graph is used by emerge to calculate what packages to |
| # install. We don't actually install any deps, so this isn't really used, |
| # but we pass it in to the Scheduler object anyway. |
| self.scheduler_graph = None |
| |
| # Portage settings for our current session. Most of these settings are set |
| # in make.conf inside our current install root. |
| self.settings = None |
| |
| # The spinner, which spews stuff to stdout to indicate that portage is |
| # doing something. We maintain our own spinner, so we set the portage |
| # spinner to "silent" mode. |
| self.spinner = None |
| |
| # The portage trees. There are separate portage trees for each root. To get |
| # the portage tree for the current root, you can look in self.trees[root], |
| # where root = self.settings["ROOT"]. |
| # |
| # In each root, there are three trees: vartree, porttree, and bintree. |
| # - vartree: A database of the currently-installed packages. |
| # - porttree: A database of ebuilds, that can be used to build packages. |
| # - bintree: A database of binary packages. |
| self.trees = None |
| |
| |
| class DepGraphGenerator(object): |
| """Grab dependency information about packages from portage. |
| |
| Typical usage: |
| deps = DepGraphGenerator() |
| deps.Initialize(sys.argv[1:]) |
| deps_tree, deps_info = deps.GenDependencyTree() |
| deps_graph = deps.GenDependencyGraph(deps_tree, deps_info) |
| deps.PrintTree(deps_tree) |
| PrintDepsMap(deps_graph) |
| """ |
| |
| __slots__ = ["board", "emerge", "package_db", "show_output", "sysroot", |
| "unpack_only", "max_retries"] |
| |
| def __init__(self): |
| self.board = None |
| self.emerge = EmergeData() |
| self.package_db = {} |
| self.show_output = False |
| self.sysroot = None |
| self.unpack_only = False |
| self.max_retries = 1 |
| |
| def ParseParallelEmergeArgs(self, argv): |
| """Read the parallel emerge arguments from the command-line. |
| |
| We need to be compatible with emerge arg format. We scrape arguments that |
| are specific to parallel_emerge, and pass through the rest directly to |
| emerge. |
| |
| Args: |
| argv: arguments list |
| |
| Returns: |
| Arguments that don't belong to parallel_emerge |
| """ |
| emerge_args = [] |
| for arg in argv: |
| # Specifically match arguments that are specific to parallel_emerge, and |
| # pass through the rest. |
| if arg.startswith("--board="): |
| self.board = arg.replace("--board=", "") |
| elif arg.startswith("--sysroot="): |
| self.sysroot = arg.replace("--sysroot=", "") |
| elif arg.startswith("--workon="): |
| workon_str = arg.replace("--workon=", "") |
| emerge_args.append("--reinstall-atoms=%s" % workon_str) |
| emerge_args.append("--usepkg-exclude=%s" % workon_str) |
| elif arg.startswith("--force-remote-binary="): |
| force_remote_binary = arg.replace("--force-remote-binary=", "") |
| emerge_args.append("--useoldpkg-atoms=%s" % force_remote_binary) |
| elif arg.startswith("--retries="): |
| self.max_retries = int(arg.replace("--retries=", "")) |
| elif arg == "--show-output": |
| self.show_output = True |
| elif arg == "--rebuild": |
| emerge_args.append("--rebuild-if-unbuilt") |
| elif arg == "--unpackonly": |
| emerge_args.append("--fetchonly") |
| self.unpack_only = True |
| elif arg.startswith("--eventlogfile="): |
| log_file_name = arg.replace("--eventlogfile=", "") |
| cros_event.setEventLogger(cros_event.getEventFileLogger(log_file_name)) |
| else: |
| # Not one of our options, so pass through to emerge. |
| emerge_args.append(arg) |
| |
| # These packages take a really long time to build, so, for expediency, we |
| # are blacklisting them from automatic rebuilds because one of their |
| # dependencies needs to be recompiled. |
| for pkg in ("chromeos-base/chromeos-chrome",): |
| emerge_args.append("--rebuild-exclude=%s" % pkg) |
| |
| return emerge_args |
| |
| def Initialize(self, args): |
| """Initializer. Parses arguments and sets up portage state.""" |
| |
| # Parse and strip out args that are just intended for parallel_emerge. |
| emerge_args = self.ParseParallelEmergeArgs(args) |
| |
| if self.sysroot and self.board: |
| cros_build_lib.Die("--sysroot and --board are incompatible.") |
| |
| # Setup various environment variables based on our current board. These |
| # variables are normally setup inside emerge-${BOARD}, but since we don't |
| # call that script, we have to set it up here. These variables serve to |
| # point our tools at /build/BOARD and to setup cross compiles to the |
| # appropriate board as configured in toolchain.conf. |
| if self.board: |
| self.sysroot = os.environ.get('SYSROOT', |
| cros_build_lib.GetSysroot(self.board)) |
| |
| if self.sysroot: |
| os.environ["PORTAGE_CONFIGROOT"] = self.sysroot |
| os.environ["SYSROOT"] = self.sysroot |
| |
| # Turn off interactive delays |
| os.environ["EBEEP_IGNORE"] = "1" |
| os.environ["EPAUSE_IGNORE"] = "1" |
| os.environ["CLEAN_DELAY"] = "0" |
| |
| # Parse the emerge options. |
| action, opts, cmdline_packages = parse_opts(emerge_args, silent=True) |
| |
| # Set environment variables based on options. Portage normally sets these |
| # environment variables in emerge_main, but we can't use that function, |
| # because it also does a bunch of other stuff that we don't want. |
| # TODO(davidjames): Patch portage to move this logic into a function we can |
| # reuse here. |
| if "--debug" in opts: |
| os.environ["PORTAGE_DEBUG"] = "1" |
| if "--config-root" in opts: |
| os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"] |
| if "--root" in opts: |
| os.environ["ROOT"] = opts["--root"] |
| if "--accept-properties" in opts: |
| os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"] |
| |
| # If we're installing packages to the board, we can disable vardb locks. |
| # This is safe because we only run up to one instance of parallel_emerge in |
| # parallel. |
| # TODO(davidjames): Enable this for the host too. |
| if self.sysroot: |
| os.environ.setdefault("PORTAGE_LOCKS", "false") |
| |
| # Now that we've setup the necessary environment variables, we can load the |
| # emerge config from disk. |
| # pylint: disable=unpacking-non-sequence |
| settings, trees, mtimedb = load_emerge_config() |
| |
| # Add in EMERGE_DEFAULT_OPTS, if specified. |
| tmpcmdline = [] |
| if "--ignore-default-opts" not in opts: |
| tmpcmdline.extend(settings["EMERGE_DEFAULT_OPTS"].split()) |
| tmpcmdline.extend(emerge_args) |
| action, opts, cmdline_packages = parse_opts(tmpcmdline) |
| |
| # If we're installing to the board, we want the --root-deps option so that |
| # portage will install the build dependencies to that location as well. |
| if self.sysroot: |
| opts.setdefault("--root-deps", True) |
| |
| # Check whether our portage tree is out of date. Typically, this happens |
| # when you're setting up a new portage tree, such as in setup_board and |
| # make_chroot. In that case, portage applies a bunch of global updates |
| # here. Once the updates are finished, we need to commit any changes |
| # that the global update made to our mtimedb, and reload the config. |
| # |
| # Portage normally handles this logic in emerge_main, but again, we can't |
| # use that function here. |
| if _global_updates(trees, mtimedb["updates"]): |
| mtimedb.commit() |
| # pylint: disable=unpacking-non-sequence |
| settings, trees, mtimedb = load_emerge_config(trees=trees) |
| |
| # Setup implied options. Portage normally handles this logic in |
| # emerge_main. |
| if "--buildpkgonly" in opts or "buildpkg" in settings.features: |
| opts.setdefault("--buildpkg", True) |
| if "--getbinpkgonly" in opts: |
| opts.setdefault("--usepkgonly", True) |
| opts.setdefault("--getbinpkg", True) |
| if "getbinpkg" in settings.features: |
| # Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n |
| opts["--getbinpkg"] = True |
| if "--getbinpkg" in opts or "--usepkgonly" in opts: |
| opts.setdefault("--usepkg", True) |
| if "--fetch-all-uri" in opts: |
| opts.setdefault("--fetchonly", True) |
| if "--skipfirst" in opts: |
| opts.setdefault("--resume", True) |
| if "--buildpkgonly" in opts: |
| # --buildpkgonly will not merge anything, so it overrides all binary |
| # package options. |
| for opt in ("--getbinpkg", "--getbinpkgonly", |
| "--usepkg", "--usepkgonly"): |
| opts.pop(opt, None) |
| if (settings.get("PORTAGE_DEBUG", "") == "1" and |
| "python-trace" in settings.features): |
| portage.debug.set_trace(True) |
| |
| # Complain about unsupported options |
| for opt in ("--ask", "--ask-enter-invalid", "--resume", "--skipfirst"): |
| if opt in opts: |
| print("%s is not supported by parallel_emerge" % opt) |
| sys.exit(1) |
| |
| # Make emerge specific adjustments to the config (e.g. colors!) |
| adjust_configs(opts, trees) |
| |
| # Save our configuration so far in the emerge object |
| emerge = self.emerge |
| emerge.action, emerge.opts = action, opts |
| emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb |
| emerge.cmdline_packages = cmdline_packages |
| root = settings["ROOT"] |
| emerge.root_config = trees[root]["root_config"] |
| |
| if "--usepkg" in opts: |
| emerge.trees[root]["bintree"].populate("--getbinpkg" in opts) |
| |
| def CreateDepgraph(self, emerge, packages): |
| """Create an emerge depgraph object.""" |
| # Setup emerge options. |
| emerge_opts = emerge.opts.copy() |
| |
| # Ask portage to build a dependency graph. with the options we specified |
| # above. |
| params = create_depgraph_params(emerge_opts, emerge.action) |
| success, depgraph, favorites = backtrack_depgraph( |
| emerge.settings, emerge.trees, emerge_opts, params, emerge.action, |
| packages, emerge.spinner) |
| emerge.depgraph = depgraph |
| |
| # Is it impossible to honor the user's request? Bail! |
| if not success: |
| depgraph.display_problems() |
| sys.exit(1) |
| |
| emerge.depgraph = depgraph |
| emerge.favorites = favorites |
| |
| # Prime and flush emerge caches. |
| root = emerge.settings["ROOT"] |
| vardb = emerge.trees[root]["vartree"].dbapi |
| if "--pretend" not in emerge.opts: |
| vardb.counter_tick() |
| vardb.flush_cache() |
| |
| def GenDependencyTree(self): |
| """Get dependency tree info from emerge. |
| |
| Returns: |
| Dependency tree |
| """ |
| start = time.time() |
| |
| emerge = self.emerge |
| |
| # Create a list of packages to merge |
| packages = set(emerge.cmdline_packages[:]) |
| |
| # Tell emerge to be quiet. We print plenty of info ourselves so we don't |
| # need any extra output from portage. |
| portage.util.noiselimit = -1 |
| |
| # My favorite feature: The silent spinner. It doesn't spin. Ever. |
| # I'd disable the colors by default too, but they look kind of cool. |
| emerge.spinner = stdout_spinner() |
| emerge.spinner.update = emerge.spinner.update_quiet |
| |
| if "--quiet" not in emerge.opts: |
| print("Calculating deps...") |
| |
| with cros_event.newEvent(step="generate dependency tree"): |
| self.CreateDepgraph(emerge, packages) |
| depgraph = emerge.depgraph |
| |
| # Build our own tree from the emerge digraph. |
| deps_tree = {} |
| # pylint: disable=W0212 |
| digraph = depgraph._dynamic_config.digraph |
| root = emerge.settings["ROOT"] |
| final_db = depgraph._dynamic_config._filtered_trees[root]['graph_db'] |
| for node, node_deps in digraph.nodes.items(): |
| # Calculate dependency packages that need to be installed first. Each |
| # child on the digraph is a dependency. The "operation" field specifies |
| # what we're doing (e.g. merge, uninstall, etc.). The "priorities" array |
| # contains the type of dependency (e.g. build, runtime, runtime_post, |
| # etc.) |
| # |
| # Portage refers to the identifiers for packages as a CPV. This acronym |
| # stands for Component/Path/Version. |
| # |
| # Here's an example CPV: chromeos-base/power_manager-0.0.1-r1 |
| # Split up, this CPV would be: |
| # C -- Component: chromeos-base |
| # P -- Path: power_manager |
| # V -- Version: 0.0.1-r1 |
| # |
| # We just refer to CPVs as packages here because it's easier. |
| deps = {} |
| for child, priorities in node_deps[0].items(): |
| if isinstance(child, Package) and child.root == root: |
| cpv = str(child.cpv) |
| action = str(child.operation) |
| |
| # If we're uninstalling a package, check whether Portage is |
| # installing a replacement. If so, just depend on the installation |
| # of the new package, because the old package will automatically |
| # be uninstalled at that time. |
| if action == "uninstall": |
| for pkg in final_db.match_pkgs(child.slot_atom): |
| cpv = str(pkg.cpv) |
| action = "merge" |
| break |
| |
| deps[cpv] = dict(action=action, |
| deptypes=[str(x) for x in priorities], |
| deps={}) |
| |
| # We've built our list of deps, so we can add our package to the tree. |
| if isinstance(node, Package) and node.root == root: |
| deps_tree[str(node.cpv)] = dict(action=str(node.operation), |
| deps=deps) |
| |
| # Ask portage for its install plan, so that we can only throw out |
| # dependencies that portage throws out. |
| deps_info = {} |
| for pkg in depgraph.altlist(): |
| if isinstance(pkg, Package): |
| assert pkg.root == root |
| self.package_db[pkg.cpv] = pkg |
| |
| # Save off info about the package |
| deps_info[str(pkg.cpv)] = {"idx": len(deps_info)} |
| |
| seconds = time.time() - start |
| if "--quiet" not in emerge.opts: |
| print("Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60)) |
| |
| return deps_tree, deps_info |
| |
| def PrintTree(self, deps, depth=""): |
| """Print the deps we have seen in the emerge output. |
| |
| Args: |
| deps: Dependency tree structure. |
| depth: Allows printing the tree recursively, with indentation. |
| """ |
| for entry in sorted(deps): |
| action = deps[entry]["action"] |
| print("%s %s (%s)" % (depth, entry, action)) |
| self.PrintTree(deps[entry]["deps"], depth=depth + " ") |
| |
| def GenDependencyGraph(self, deps_tree, deps_info): |
| """Generate a doubly linked dependency graph. |
| |
| Args: |
| deps_tree: Dependency tree structure. |
| deps_info: More details on the dependencies. |
| |
| Returns: |
| Deps graph in the form of a dict of packages, with each package |
| specifying a "needs" list and "provides" list. |
| """ |
| emerge = self.emerge |
| |
| # deps_map is the actual dependency graph. |
| # |
| # Each package specifies a "needs" list and a "provides" list. The "needs" |
| # list indicates which packages we depend on. The "provides" list |
| # indicates the reverse dependencies -- what packages need us. |
| # |
| # We also provide some other information in the dependency graph: |
| # - action: What we're planning on doing with this package. Generally, |
| # "merge", "nomerge", or "uninstall" |
| deps_map = {} |
| |
| def ReverseTree(packages): |
| """Convert tree to digraph. |
| |
| Take the tree of package -> requirements and reverse it to a digraph of |
| buildable packages -> packages they unblock. |
| |
| Args: |
| packages: Tree(s) of dependencies. |
| |
| Returns: |
| Unsanitized digraph. |
| """ |
| binpkg_phases = set(["setup", "preinst", "postinst"]) |
| needed_dep_types = set(["blocker", "buildtime", "buildtime_slot_op", |
| "runtime", "runtime_slot_op"]) |
| ignored_dep_types = set(["ignored", "optional", "runtime_post", "soft"]) |
| all_dep_types = ignored_dep_types | needed_dep_types |
| for pkg in packages: |
| |
| # Create an entry for the package |
| action = packages[pkg]["action"] |
| default_pkg = {"needs": {}, "provides": set(), "action": action, |
| "nodeps": False, "binary": False} |
| this_pkg = deps_map.setdefault(pkg, default_pkg) |
| |
| if pkg in deps_info: |
| this_pkg["idx"] = deps_info[pkg]["idx"] |
| |
| # If a package doesn't have any defined phases that might use the |
| # dependent packages (i.e. pkg_setup, pkg_preinst, or pkg_postinst), |
| # we can install this package before its deps are ready. |
| emerge_pkg = self.package_db.get(pkg) |
| if emerge_pkg and emerge_pkg.type_name == "binary": |
| this_pkg["binary"] = True |
| defined_phases = emerge_pkg.defined_phases |
| defined_binpkg_phases = binpkg_phases.intersection(defined_phases) |
| if not defined_binpkg_phases: |
| this_pkg["nodeps"] = True |
| |
| # Create entries for dependencies of this package first. |
| ReverseTree(packages[pkg]["deps"]) |
| |
| # Add dependencies to this package. |
| for dep, dep_item in packages[pkg]["deps"].iteritems(): |
| # We only need to enforce strict ordering of dependencies if the |
| # dependency is a blocker, or is a buildtime or runtime dependency. |
| # (I.e., ignored, optional, and runtime_post dependencies don't |
| # depend on ordering.) |
| dep_types = dep_item["deptypes"] |
| if needed_dep_types.intersection(dep_types): |
| deps_map[dep]["provides"].add(pkg) |
| this_pkg["needs"][dep] = "/".join(dep_types) |
| |
| # Verify we processed all appropriate dependency types. |
| unknown_dep_types = set(dep_types) - all_dep_types |
| if unknown_dep_types: |
| print("Unknown dependency types found:") |
| print(" %s -> %s (%s)" % (pkg, dep, "/".join(unknown_dep_types))) |
| sys.exit(1) |
| |
| # If there's a blocker, Portage may need to move files from one |
| # package to another, which requires editing the CONTENTS files of |
| # both packages. To avoid race conditions while editing this file, |
| # the two packages must not be installed in parallel, so we can't |
| # safely ignore dependencies. See http://crosbug.com/19328 |
| if "blocker" in dep_types: |
| this_pkg["nodeps"] = False |
| |
| def FindCycles(): |
| """Find cycles in the dependency tree. |
| |
| Returns: |
| A dict mapping cyclic packages to a dict of the deps that cause |
| cycles. For each dep that causes cycles, it returns an example |
| traversal of the graph that shows the cycle. |
| """ |
| |
| def FindCyclesAtNode(pkg, cycles, unresolved, resolved): |
| """Find cycles in cyclic dependencies starting at specified package. |
| |
| Args: |
| pkg: Package identifier. |
| cycles: A dict mapping cyclic packages to a dict of the deps that |
| cause cycles. For each dep that causes cycles, it returns an |
| example traversal of the graph that shows the cycle. |
| unresolved: Nodes that have been visited but are not fully processed. |
| resolved: Nodes that have been visited and are fully processed. |
| """ |
| pkg_cycles = cycles.get(pkg) |
| if pkg in resolved and not pkg_cycles: |
| # If we already looked at this package, and found no cyclic |
| # dependencies, we can stop now. |
| return |
| unresolved.append(pkg) |
| for dep in deps_map[pkg]["needs"]: |
| if dep in unresolved: |
| idx = unresolved.index(dep) |
| mycycle = unresolved[idx:] + [dep] |
| for i in xrange(len(mycycle) - 1): |
| pkg1, pkg2 = mycycle[i], mycycle[i+1] |
| cycles.setdefault(pkg1, {}).setdefault(pkg2, mycycle) |
| elif not pkg_cycles or dep not in pkg_cycles: |
| # Looks like we haven't seen this edge before. |
| FindCyclesAtNode(dep, cycles, unresolved, resolved) |
| unresolved.pop() |
| resolved.add(pkg) |
| |
| cycles, unresolved, resolved = {}, [], set() |
| for pkg in deps_map: |
| FindCyclesAtNode(pkg, cycles, unresolved, resolved) |
| return cycles |
| |
| def RemoveUnusedPackages(): |
| """Remove installed packages, propagating dependencies.""" |
| # Schedule packages that aren't on the install list for removal |
| rm_pkgs = set(deps_map.keys()) - set(deps_info.keys()) |
| |
| # Remove the packages we don't want, simplifying the graph and making |
| # it easier for us to crack cycles. |
| for pkg in sorted(rm_pkgs): |
| this_pkg = deps_map[pkg] |
| needs = this_pkg["needs"] |
| provides = this_pkg["provides"] |
| for dep in needs: |
| dep_provides = deps_map[dep]["provides"] |
| dep_provides.update(provides) |
| dep_provides.discard(pkg) |
| dep_provides.discard(dep) |
| for target in provides: |
| target_needs = deps_map[target]["needs"] |
| target_needs.update(needs) |
| target_needs.pop(pkg, None) |
| target_needs.pop(target, None) |
| del deps_map[pkg] |
| |
| def PrintCycleBreak(basedep, dep, mycycle): |
| """Print details about a cycle that we are planning on breaking. |
| |
| We are breaking a cycle where dep needs basedep. mycycle is an |
| example cycle which contains dep -> basedep. |
| """ |
| |
| needs = deps_map[dep]["needs"] |
| depinfo = needs.get(basedep, "deleted") |
| |
| # It's OK to swap install order for blockers, as long as the two |
| # packages aren't installed in parallel. If there is a cycle, then |
| # we know the packages depend on each other already, so we can drop the |
| # blocker safely without printing a warning. |
| if depinfo == "blocker": |
| return |
| |
| # Notify the user that we're breaking a cycle. |
| print("Breaking %s -> %s (%s)" % (dep, basedep, depinfo)) |
| |
| # Show cycle. |
| for i in xrange(len(mycycle) - 1): |
| pkg1, pkg2 = mycycle[i], mycycle[i+1] |
| needs = deps_map[pkg1]["needs"] |
| depinfo = needs.get(pkg2, "deleted") |
| if pkg1 == dep and pkg2 == basedep: |
| depinfo = depinfo + ", deleting" |
| print(" %s -> %s (%s)" % (pkg1, pkg2, depinfo)) |
| |
| def SanitizeTree(): |
| """Remove circular dependencies. |
| |
| We prune all dependencies involved in cycles that go against the emerge |
| ordering. This has a nice property: we're guaranteed to merge |
| dependencies in the same order that portage does. |
| |
| Because we don't treat any dependencies as "soft" unless they're killed |
| by a cycle, we pay attention to a larger number of dependencies when |
| merging. This hurts performance a bit, but helps reliability. |
| """ |
| start = time.time() |
| cycles = FindCycles() |
| while cycles: |
| for dep, mycycles in cycles.iteritems(): |
| for basedep, mycycle in mycycles.iteritems(): |
| if deps_info[basedep]["idx"] >= deps_info[dep]["idx"]: |
| if "--quiet" not in emerge.opts: |
| PrintCycleBreak(basedep, dep, mycycle) |
| del deps_map[dep]["needs"][basedep] |
| deps_map[basedep]["provides"].remove(dep) |
| cycles = FindCycles() |
| seconds = time.time() - start |
| if "--quiet" not in emerge.opts and seconds >= 0.1: |
| print("Tree sanitized in %dm%.1fs" % (seconds / 60, seconds % 60)) |
| |
| def FindRecursiveProvides(pkg, seen): |
| """Find all nodes that require a particular package. |
| |
| Assumes that graph is acyclic. |
| |
| Args: |
| pkg: Package identifier. |
| seen: Nodes that have been visited so far. |
| """ |
| if pkg in seen: |
| return |
| seen.add(pkg) |
| info = deps_map[pkg] |
| info["tprovides"] = info["provides"].copy() |
| for dep in info["provides"]: |
| FindRecursiveProvides(dep, seen) |
| info["tprovides"].update(deps_map[dep]["tprovides"]) |
| |
| ReverseTree(deps_tree) |
| |
| # We need to remove unused packages so that we can use the dependency |
| # ordering of the install process to show us what cycles to crack. |
| RemoveUnusedPackages() |
| SanitizeTree() |
| seen = set() |
| for pkg in deps_map: |
| FindRecursiveProvides(pkg, seen) |
| return deps_map |
| |
| def PrintInstallPlan(self, deps_map): |
| """Print an emerge-style install plan. |
| |
| The install plan lists what packages we're installing, in order. |
| It's useful for understanding what parallel_emerge is doing. |
| |
| Args: |
| deps_map: The dependency graph. |
| """ |
| |
| def InstallPlanAtNode(target, deps_map): |
| nodes = [] |
| nodes.append(target) |
| for dep in deps_map[target]["provides"]: |
| del deps_map[dep]["needs"][target] |
| if not deps_map[dep]["needs"]: |
| nodes.extend(InstallPlanAtNode(dep, deps_map)) |
| return nodes |
| |
| deps_map = copy.deepcopy(deps_map) |
| install_plan = [] |
| plan = set() |
| for target, info in deps_map.iteritems(): |
| if not info["needs"] and target not in plan: |
| for item in InstallPlanAtNode(target, deps_map): |
| plan.add(item) |
| install_plan.append(self.package_db[item]) |
| |
| for pkg in plan: |
| del deps_map[pkg] |
| |
| if deps_map: |
| print("Cyclic dependencies:", " ".join(deps_map)) |
| PrintDepsMap(deps_map) |
| sys.exit(1) |
| |
| self.emerge.depgraph.display(install_plan) |
| |
| |
| def PrintDepsMap(deps_map): |
| """Print dependency graph, for each package list it's prerequisites.""" |
| for i in sorted(deps_map): |
| print("%s: (%s) needs" % (i, deps_map[i]["action"])) |
| needs = deps_map[i]["needs"] |
| for j in sorted(needs): |
| print(" %s" % (j)) |
| if not needs: |
| print(" no dependencies") |
| |
| |
| class EmergeJobState(object): |
| """Structure describing the EmergeJobState.""" |
| |
| __slots__ = ["done", "filename", "last_notify_timestamp", "last_output_seek", |
| "last_output_timestamp", "pkgname", "retcode", "start_timestamp", |
| "target", "fetch_only", "unpack_only"] |
| |
| def __init__(self, target, pkgname, done, filename, start_timestamp, |
| retcode=None, fetch_only=False, unpack_only=False): |
| |
| # The full name of the target we're building (e.g. |
| # virtual/target-os-1-r60) |
| self.target = target |
| |
| # The short name of the target we're building (e.g. target-os-1-r60) |
| self.pkgname = pkgname |
| |
| # Whether the job is done. (True if the job is done; false otherwise.) |
| self.done = done |
| |
| # The filename where output is currently stored. |
| self.filename = filename |
| |
| # The timestamp of the last time we printed the name of the log file. We |
| # print this at the beginning of the job, so this starts at |
| # start_timestamp. |
| self.last_notify_timestamp = start_timestamp |
| |
| # The location (in bytes) of the end of the last complete line we printed. |
| # This starts off at zero. We use this to jump to the right place when we |
| # print output from the same ebuild multiple times. |
| self.last_output_seek = 0 |
| |
| # The timestamp of the last time we printed output. Since we haven't |
| # printed output yet, this starts at zero. |
| self.last_output_timestamp = 0 |
| |
| # The return code of our job, if the job is actually finished. |
| self.retcode = retcode |
| |
| # Was this just a fetch job? |
| self.fetch_only = fetch_only |
| |
| # The timestamp when our job started. |
| self.start_timestamp = start_timestamp |
| |
| # No emerge, only unpack packages. |
| self.unpack_only = unpack_only |
| |
| |
| def KillHandler(_signum, _frame): |
| # Kill self and all subprocesses. |
| os.killpg(0, signal.SIGKILL) |
| |
| |
| def SetupWorkerSignals(): |
| def ExitHandler(_signum, _frame): |
| # Set KILLED flag. |
| KILLED.set() |
| |
| # Remove our signal handlers so we don't get called recursively. |
| signal.signal(signal.SIGINT, KillHandler) |
| signal.signal(signal.SIGTERM, KillHandler) |
| |
| # Ensure that we exit quietly and cleanly, if possible, when we receive |
| # SIGTERM or SIGINT signals. By default, when the user hits CTRL-C, all |
| # of the child processes will print details about KeyboardInterrupt |
| # exceptions, which isn't very helpful. |
| signal.signal(signal.SIGINT, ExitHandler) |
| signal.signal(signal.SIGTERM, ExitHandler) |
| |
| |
| def EmergeProcess(output, target, *args, **kwargs): |
| """Merge a package in a subprocess. |
| |
| Args: |
| output: Temporary file to write output. |
| target: The package we'll be processing (for display purposes). |
| *args: Arguments to pass to Scheduler constructor. |
| **kwargs: Keyword arguments to pass to Scheduler constructor. |
| |
| Returns: |
| The exit code returned by the subprocess. |
| """ |
| event = cros_event.newEvent(step="emerge", package=target) |
| pid = os.fork() |
| if pid == 0: |
| try: |
| proctitle.settitle('EmergeProcess', target) |
| |
| # Sanity checks. |
| if sys.stdout.fileno() != 1: |
| raise Exception("sys.stdout.fileno() != 1") |
| if sys.stderr.fileno() != 2: |
| raise Exception("sys.stderr.fileno() != 2") |
| |
| # - Redirect 1 (stdout) and 2 (stderr) at our temporary file. |
| # - Redirect 0 to point at sys.stdin. In this case, sys.stdin |
| # points at a file reading os.devnull, because multiprocessing mucks |
| # with sys.stdin. |
| # - Leave the sys.stdin and output filehandles alone. |
| fd_pipes = {0: sys.stdin.fileno(), |
| 1: output.fileno(), |
| 2: output.fileno(), |
| sys.stdin.fileno(): sys.stdin.fileno(), |
| output.fileno(): output.fileno()} |
| # pylint: disable=W0212 |
| portage.process._setup_pipes(fd_pipes, close_fds=False) |
| |
| # Portage doesn't like when sys.stdin.fileno() != 0, so point sys.stdin |
| # at the filehandle we just created in _setup_pipes. |
| if sys.stdin.fileno() != 0: |
| sys.__stdin__ = sys.stdin = os.fdopen(0, "r") |
| |
| scheduler = Scheduler(*args, **kwargs) |
| |
| # Enable blocker handling even though we're in --nodeps mode. This |
| # allows us to unmerge the blocker after we've merged the replacement. |
| scheduler._opts_ignore_blockers = frozenset() |
| |
| # Actually do the merge. |
| with event: |
| retval = scheduler.merge() |
| if retval != 0: |
| event.fail(message="non-zero value returned") |
| |
| # We catch all exceptions here (including SystemExit, KeyboardInterrupt, |
| # etc) so as to ensure that we don't confuse the multiprocessing module, |
| # which expects that all forked children exit with os._exit(). |
| # pylint: disable=W0702 |
| except: |
| traceback.print_exc(file=output) |
| retval = 1 |
| sys.stdout.flush() |
| sys.stderr.flush() |
| output.flush() |
| # pylint: disable=W0212 |
| os._exit(retval) |
| else: |
| # Return the exit code of the subprocess. |
| return os.waitpid(pid, 0)[1] |
| |
| |
| def UnpackPackage(pkg_state): |
| """Unpacks package described by pkg_state. |
| |
| Args: |
| pkg_state: EmergeJobState object describing target. |
| |
| Returns: |
| Exit code returned by subprocess. |
| """ |
| pkgdir = os.environ.get("PKGDIR", |
| os.path.join(os.environ["SYSROOT"], "packages")) |
| root = os.environ.get("ROOT", os.environ["SYSROOT"]) |
| path = os.path.join(pkgdir, pkg_state.target + ".tbz2") |
| comp = cros_build_lib.FindCompressor(cros_build_lib.COMP_BZIP2) |
| cmd = [comp, "-dc"] |
| if comp.endswith("pbzip2"): |
| cmd.append("--ignore-trailing-garbage=1") |
| cmd.append(path) |
| |
| with cros_event.newEvent(step="unpack package", **pkg_state) as event: |
| result = cros_build_lib.RunCommand(cmd, cwd=root, stdout_to_pipe=True, |
| print_cmd=False, error_code_ok=True) |
| |
| # If we were not successful, return now and don't attempt untar. |
| if result.returncode != 0: |
| event.fail("error compressing: returned {}".format(result.returncode)) |
| return result.returncode |
| |
| cmd = ["sudo", "tar", "-xf", "-", "-C", root] |
| |
| result = cros_build_lib.RunCommand(cmd, cwd=root, input=result.output, |
| print_cmd=False, error_code_ok=True) |
| if result.returncode != 0: |
| event.fail("error extracting:returned {}".format(result.returncode)) |
| |
| return result.returncode |
| |
| |
| def EmergeWorker(task_queue, job_queue, emerge, package_db, fetch_only=False, |
| unpack_only=False): |
| """This worker emerges any packages given to it on the task_queue. |
| |
| Args: |
| task_queue: The queue of tasks for this worker to do. |
| job_queue: The queue of results from the worker. |
| emerge: An EmergeData() object. |
| package_db: A dict, mapping package ids to portage Package objects. |
| fetch_only: A bool, indicating if we should just fetch the target. |
| unpack_only: A bool, indicating if we should just unpack the target. |
| |
| It expects package identifiers to be passed to it via task_queue. When |
| a task is started, it pushes the (target, filename) to the started_queue. |
| The output is stored in filename. When a merge starts or finishes, we push |
| EmergeJobState objects to the job_queue. |
| """ |
| if fetch_only: |
| mode = 'fetch' |
| elif unpack_only: |
| mode = 'unpack' |
| else: |
| mode = 'emerge' |
| proctitle.settitle('EmergeWorker', mode, '[idle]') |
| |
| SetupWorkerSignals() |
| settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb |
| |
| # Disable flushing of caches to save on I/O. |
| root = emerge.settings["ROOT"] |
| vardb = emerge.trees[root]["vartree"].dbapi |
| vardb._flush_cache_enabled = False # pylint: disable=protected-access |
| bindb = emerge.trees[root]["bintree"].dbapi |
| # Might be a set, might be a list, might be None; no clue, just use shallow |
| # copy to ensure we can roll it back. |
| # pylint: disable=W0212 |
| original_remotepkgs = copy.copy(bindb.bintree._remotepkgs) |
| |
| opts, spinner = emerge.opts, emerge.spinner |
| opts["--nodeps"] = True |
| if fetch_only: |
| opts["--fetchonly"] = True |
| |
| while True: |
| # Wait for a new item to show up on the queue. This is a blocking wait, |
| # so if there's nothing to do, we just sit here. |
| pkg_state = task_queue.get() |
| if pkg_state is None: |
| # If target is None, this means that the main thread wants us to quit. |
| # The other workers need to exit too, so we'll push the message back on |
| # to the queue so they'll get it too. |
| task_queue.put(None) |
| return |
| if KILLED.is_set(): |
| return |
| |
| target = pkg_state.target |
| proctitle.settitle('EmergeWorker', mode, target) |
| |
| db_pkg = package_db[target] |
| |
| if db_pkg.type_name == "binary": |
| if not fetch_only and pkg_state.fetched_successfully: |
| # Ensure portage doesn't think our pkg is remote- else it'll force |
| # a redownload of it (even if the on-disk file is fine). In-memory |
| # caching basically, implemented dumbly. |
| bindb.bintree._remotepkgs = None |
| else: |
| bindb.bintree_remotepkgs = original_remotepkgs |
| |
| db_pkg.root_config = emerge.root_config |
| install_list = [db_pkg] |
| pkgname = db_pkg.pf |
| output = tempfile.NamedTemporaryFile(prefix=pkgname + "-", delete=False) |
| os.chmod(output.name, 644) |
| start_timestamp = time.time() |
| job = EmergeJobState(target, pkgname, False, output.name, start_timestamp, |
| fetch_only=fetch_only, unpack_only=unpack_only) |
| job_queue.put(job) |
| if "--pretend" in opts: |
| retcode = 0 |
| else: |
| try: |
| emerge.scheduler_graph.mergelist = install_list |
| if unpack_only: |
| retcode = UnpackPackage(pkg_state) |
| else: |
| retcode = EmergeProcess(output, target, settings, trees, mtimedb, |
| opts, spinner, favorites=emerge.favorites, |
| graph_config=emerge.scheduler_graph) |
| except Exception: |
| traceback.print_exc(file=output) |
| retcode = 1 |
| output.close() |
| |
| if KILLED.is_set(): |
| return |
| |
| job = EmergeJobState(target, pkgname, True, output.name, start_timestamp, |
| retcode, fetch_only=fetch_only, |
| unpack_only=unpack_only) |
| job_queue.put(job) |
| |
| # Set the title back to idle as the multiprocess pool won't destroy us; |
| # when another job comes up, it'll re-use this process. |
| proctitle.settitle('EmergeWorker', mode, '[idle]') |
| |
| |
| class LinePrinter(object): |
| """Helper object to print a single line.""" |
| |
| def __init__(self, line): |
| self.line = line |
| |
| def Print(self, _seek_locations): |
| print(self.line) |
| |
| |
| class JobPrinter(object): |
| """Helper object to print output of a job.""" |
| |
| def __init__(self, job, unlink=False): |
| """Print output of job. |
| |
| If unlink is True, unlink the job output file when done. |
| """ |
| self.current_time = time.time() |
| self.job = job |
| self.unlink = unlink |
| |
| def Print(self, seek_locations): |
| |
| job = self.job |
| |
| # Calculate how long the job has been running. |
| seconds = self.current_time - job.start_timestamp |
| |
| # Note that we've printed out the job so far. |
| job.last_output_timestamp = self.current_time |
| |
| # Note that we're starting the job |
| info = "job %s (%dm%.1fs)" % (job.pkgname, seconds / 60, seconds % 60) |
| last_output_seek = seek_locations.get(job.filename, 0) |
| if last_output_seek: |
| print("=== Continue output for %s ===" % info) |
| else: |
| print("=== Start output for %s ===" % info) |
| |
| # Print actual output from job |
| f = codecs.open(job.filename, encoding='utf-8', errors='replace') |
| f.seek(last_output_seek) |
| prefix = job.pkgname + ":" |
| for line in f: |
| |
| # Save off our position in the file |
| if line and line[-1] == "\n": |
| last_output_seek = f.tell() |
| line = line[:-1] |
| |
| # Print our line |
| print(prefix, line.encode('utf-8', 'replace')) |
| f.close() |
| |
| # Save our last spot in the file so that we don't print out the same |
| # location twice. |
| seek_locations[job.filename] = last_output_seek |
| |
| # Note end of output section |
| if job.done: |
| print("=== Complete: %s ===" % info) |
| else: |
| print("=== Still running: %s ===" % info) |
| |
| if self.unlink: |
| os.unlink(job.filename) |
| |
| |
| def PrintWorker(queue): |
| """A worker that prints stuff to the screen as requested.""" |
| proctitle.settitle('PrintWorker') |
| |
| def ExitHandler(_signum, _frame): |
| # Set KILLED flag. |
| KILLED.set() |
| |
| # Switch to default signal handlers so that we'll die after two signals. |
| signal.signal(signal.SIGINT, KillHandler) |
| signal.signal(signal.SIGTERM, KillHandler) |
| |
| # Don't exit on the first SIGINT / SIGTERM, because the parent worker will |
| # handle it and tell us when we need to exit. |
| signal.signal(signal.SIGINT, ExitHandler) |
| signal.signal(signal.SIGTERM, ExitHandler) |
| |
| # seek_locations is a map indicating the position we are at in each file. |
| # It starts off empty, but is set by the various Print jobs as we go along |
| # to indicate where we left off in each file. |
| seek_locations = {} |
| while True: |
| try: |
| job = queue.get() |
| if job: |
| job.Print(seek_locations) |
| sys.stdout.flush() |
| else: |
| break |
| except IOError as ex: |
| if ex.errno == errno.EINTR: |
| # Looks like we received a signal. Keep printing. |
| continue |
| raise |
| |
| |
| class TargetState(object): |
| """Structure describing the TargetState.""" |
| |
| __slots__ = ("target", "info", "score", "prefetched", "fetched_successfully") |
| |
| def __init__(self, target, info): |
| self.target, self.info = target, info |
| self.fetched_successfully = False |
| self.prefetched = False |
| self.score = None |
| self.update_score() |
| |
| def __cmp__(self, other): |
| return cmp(self.score, other.score) |
| |
| def update_score(self): |
| self.score = ( |
| -len(self.info["tprovides"]), |
| len(self.info["needs"]), |
| not self.info["binary"], |
| -len(self.info["provides"]), |
| self.info["idx"], |
| self.target, |
| ) |
| |
| |
| class ScoredHeap(object): |
| """Implementation of a general purpose scored heap.""" |
| |
| __slots__ = ("heap", "_heap_set") |
| |
| def __init__(self, initial=()): |
| self.heap = list() |
| self._heap_set = set() |
| if initial: |
| self.multi_put(initial) |
| |
| def get(self): |
| item = heapq.heappop(self.heap) |
| self._heap_set.remove(item.target) |
| return item |
| |
| def put(self, item): |
| if not isinstance(item, TargetState): |
| raise ValueError("Item %r isn't a TargetState" % (item,)) |
| heapq.heappush(self.heap, item) |
| self._heap_set.add(item.target) |
| |
| def multi_put(self, sequence): |
| sequence = list(sequence) |
| self.heap.extend(sequence) |
| self._heap_set.update(x.target for x in sequence) |
| self.sort() |
| |
| def sort(self): |
| heapq.heapify(self.heap) |
| |
| def __contains__(self, target): |
| return target in self._heap_set |
| |
| def __nonzero__(self): |
| return bool(self.heap) |
| |
| def __len__(self): |
| return len(self.heap) |
| |
| |
| class EmergeQueue(object): |
| """Class to schedule emerge jobs according to a dependency graph.""" |
| |
| def __init__(self, deps_map, emerge, package_db, show_output, unpack_only, |
| max_retries): |
| # Store the dependency graph. |
| self._deps_map = deps_map |
| self._state_map = {} |
| # Initialize the running queue to empty |
| self._build_jobs = {} |
| self._build_ready = ScoredHeap() |
| self._fetch_jobs = {} |
| self._fetch_ready = ScoredHeap() |
| self._unpack_jobs = {} |
| self._unpack_ready = ScoredHeap() |
| # List of total package installs represented in deps_map. |
| install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"] |
| self._total_jobs = len(install_jobs) |
| self._show_output = show_output |
| self._unpack_only = unpack_only |
| self._max_retries = max_retries |
| |
| if "--pretend" in emerge.opts: |
| print("Skipping merge because of --pretend mode.") |
| sys.exit(0) |
| |
| # Set up a session so we can easily terminate all children. |
| self._SetupSession() |
| |
| # Setup scheduler graph object. This is used by the child processes |
| # to help schedule jobs. |
| emerge.scheduler_graph = emerge.depgraph.schedulerGraph() |
| |
| # Calculate how many jobs we can run in parallel. We don't want to pass |
| # the --jobs flag over to emerge itself, because that'll tell emerge to |
| # hide its output, and said output is quite useful for debugging hung |
| # jobs. |
| procs = min(self._total_jobs, |
| emerge.opts.pop("--jobs", multiprocessing.cpu_count())) |
| self._build_procs = self._unpack_procs = max(1, procs) |
| # Fetch is IO bound, we can use more processes. |
| self._fetch_procs = max(4, procs) |
| self._load_avg = emerge.opts.pop("--load-average", None) |
| self._job_queue = multiprocessing.Queue() |
| self._print_queue = multiprocessing.Queue() |
| |
| self._fetch_queue = multiprocessing.Queue() |
| args = (self._fetch_queue, self._job_queue, emerge, package_db, True) |
| self._fetch_pool = multiprocessing.Pool(self._fetch_procs, EmergeWorker, |
| args) |
| |
| self._build_queue = multiprocessing.Queue() |
| args = (self._build_queue, self._job_queue, emerge, package_db) |
| self._build_pool = multiprocessing.Pool(self._build_procs, EmergeWorker, |
| args) |
| |
| if self._unpack_only: |
| # Unpack pool only required on unpack_only jobs. |
| self._unpack_queue = multiprocessing.Queue() |
| args = (self._unpack_queue, self._job_queue, emerge, package_db, False, |
| True) |
| self._unpack_pool = multiprocessing.Pool(self._unpack_procs, EmergeWorker, |
| args) |
| |
| self._print_worker = multiprocessing.Process(target=PrintWorker, |
| args=[self._print_queue]) |
| self._print_worker.start() |
| |
| # Initialize the failed queue to empty. |
| self._retry_queue = [] |
| self._failed_count = dict() |
| |
| # Setup an exit handler so that we print nice messages if we are |
| # terminated. |
| self._SetupExitHandler() |
| |
| # Schedule our jobs. |
| self._state_map.update( |
| (pkg, TargetState(pkg, data)) for pkg, data in deps_map.iteritems()) |
| self._fetch_ready.multi_put(self._state_map.itervalues()) |
| |
| def _SetupSession(self): |
| """Set up a session so we can easily terminate all children.""" |
| # When we call os.setsid(), this sets up a session / process group for this |
| # process and all children. These session groups are needed so that we can |
| # easily kill all children (including processes launched by emerge) before |
| # we exit. |
| # |
| # One unfortunate side effect of os.setsid() is that it blocks CTRL-C from |
| # being received. To work around this, we only call os.setsid() in a forked |
| # process, so that the parent can still watch for CTRL-C. The parent will |
| # just sit around, watching for signals and propagating them to the child, |
| # until the child exits. |
| # |
| # TODO(davidjames): It would be nice if we could replace this with cgroups. |
| pid = os.fork() |
| if pid == 0: |
| os.setsid() |
| else: |
| proctitle.settitle('SessionManager') |
| |
| def PropagateToChildren(signum, _frame): |
| # Just propagate the signals down to the child. We'll exit when the |
| # child does. |
| try: |
| os.kill(pid, signum) |
| except OSError as ex: |
| if ex.errno != errno.ESRCH: |
| raise |
| signal.signal(signal.SIGINT, PropagateToChildren) |
| signal.signal(signal.SIGTERM, PropagateToChildren) |
| |
| def StopGroup(_signum, _frame): |
| # When we get stopped, stop the children. |
| try: |
| os.killpg(pid, signal.SIGSTOP) |
| os.kill(0, signal.SIGSTOP) |
| except OSError as ex: |
| if ex.errno != errno.ESRCH: |
| raise |
| signal.signal(signal.SIGTSTP, StopGroup) |
| |
| def ContinueGroup(_signum, _frame): |
| # Launch the children again after being stopped. |
| try: |
| os.killpg(pid, signal.SIGCONT) |
| except OSError as ex: |
| if ex.errno != errno.ESRCH: |
| raise |
| signal.signal(signal.SIGCONT, ContinueGroup) |
| |
| # Loop until the children exit. We exit with os._exit to be sure we |
| # don't run any finalizers (those will be run by the child process.) |
| # pylint: disable=W0212 |
| while True: |
| try: |
| # Wait for the process to exit. When it does, exit with the return |
| # value of the subprocess. |
| os._exit(process_util.GetExitStatus(os.waitpid(pid, 0)[1])) |
| except OSError as ex: |
| if ex.errno == errno.EINTR: |
| continue |
| traceback.print_exc() |
| os._exit(1) |
| except BaseException: |
| traceback.print_exc() |
| os._exit(1) |
| |
| def _SetupExitHandler(self): |
| |
| def ExitHandler(signum, _frame): |
| # Set KILLED flag. |
| KILLED.set() |
| |
| # Kill our signal handlers so we don't get called recursively |
| signal.signal(signal.SIGINT, KillHandler) |
| signal.signal(signal.SIGTERM, KillHandler) |
| |
| # Print our current job status |
| for job in self._build_jobs.itervalues(): |
| if job: |
| self._print_queue.put(JobPrinter(job, unlink=True)) |
| |
| # Notify the user that we are exiting |
| self._Print("Exiting on signal %s" % signum) |
| self._print_queue.put(None) |
| self._print_worker.join() |
| |
| # Kill child threads, then exit. |
| os.killpg(0, signal.SIGKILL) |
| sys.exit(1) |
| |
| # Print out job status when we are killed |
| signal.signal(signal.SIGINT, ExitHandler) |
| signal.signal(signal.SIGTERM, ExitHandler) |
| |
| def _ScheduleUnpack(self, pkg_state): |
| self._unpack_jobs[pkg_state.target] = None |
| self._unpack_queue.put(pkg_state) |
| |
| def _Schedule(self, pkg_state): |
| # We maintain a tree of all deps, if this doesn't need |
| # to be installed just free up its children and continue. |
| # It is possible to reinstall deps of deps, without reinstalling |
| # first level deps, like so: |
| # virtual/target-os (merge) -> eselect (nomerge) -> python (merge) |
| this_pkg = pkg_state.info |
| target = pkg_state.target |
| if pkg_state.info is not None: |
| if this_pkg["action"] == "nomerge": |
| self._Finish(target) |
| elif target not in self._build_jobs: |
| # Kick off the build if it's marked to be built. |
| self._build_jobs[target] = None |
| self._build_queue.put(pkg_state) |
| return True |
| |
| def _ScheduleLoop(self, unpack_only=False): |
| if unpack_only: |
| ready_queue = self._unpack_ready |
| jobs_queue = self._unpack_jobs |
| procs = self._unpack_procs |
| else: |
| ready_queue = self._build_ready |
| jobs_queue = self._build_jobs |
| procs = self._build_procs |
| |
| # If the current load exceeds our desired load average, don't schedule |
| # more than one job. |
| if self._load_avg and os.getloadavg()[0] > self._load_avg: |
| needed_jobs = 1 |
| else: |
| needed_jobs = procs |
| |
| # Schedule more jobs. |
| while ready_queue and len(jobs_queue) < needed_jobs: |
| state = ready_queue.get() |
| if unpack_only: |
| self._ScheduleUnpack(state) |
| else: |
| if state.target not in self._failed_count: |
| self._Schedule(state) |
| |
| def _Print(self, line): |
| """Print a single line.""" |
| self._print_queue.put(LinePrinter(line)) |
| |
| def _Status(self): |
| """Print status.""" |
| current_time = time.time() |
| current_time_struct = time.localtime(current_time) |
| no_output = True |
| |
| # Print interim output every minute if --show-output is used. Otherwise, |
| # print notifications about running packages every 2 minutes, and print |
| # full output for jobs that have been running for 60 minutes or more. |
| if self._show_output: |
| interval = 60 |
| notify_interval = 0 |
| else: |
| interval = 60 * 60 |
| notify_interval = 60 * 2 |
| for job in self._build_jobs.itervalues(): |
| if job: |
| last_timestamp = max(job.start_timestamp, job.last_output_timestamp) |
| if last_timestamp + interval < current_time: |
| self._print_queue.put(JobPrinter(job)) |
| job.last_output_timestamp = current_time |
| no_output = False |
| elif (notify_interval and |
| job.last_notify_timestamp + notify_interval < current_time): |
| job_seconds = current_time - job.start_timestamp |
| args = (job.pkgname, job_seconds / 60, job_seconds % 60, job.filename) |
| info = "Still building %s (%dm%.1fs). Logs in %s" % args |
| job.last_notify_timestamp = current_time |
| self._Print(info) |
| no_output = False |
| |
| # If we haven't printed any messages yet, print a general status message |
| # here. |
| if no_output: |
| seconds = current_time - GLOBAL_START |
| fjobs, fready = len(self._fetch_jobs), len(self._fetch_ready) |
| ujobs, uready = len(self._unpack_jobs), len(self._unpack_ready) |
| bjobs, bready = len(self._build_jobs), len(self._build_ready) |
| retries = len(self._retry_queue) |
| pending = max(0, len(self._deps_map) - fjobs - bjobs) |
| line = "Pending %s/%s, " % (pending, self._total_jobs) |
| if fjobs or fready: |
| line += "Fetching %s/%s, " % (fjobs, fready + fjobs) |
| if ujobs or uready: |
| line += "Unpacking %s/%s, " % (ujobs, uready + ujobs) |
| if bjobs or bready or retries: |
| line += "Building %s/%s, " % (bjobs, bready + bjobs) |
| if retries: |
| line += "Retrying %s, " % (retries,) |
| load = " ".join(str(x) for x in os.getloadavg()) |
| line += ("[Time %s | Elapsed %dm%.1fs | Load %s]" % ( |
| time.strftime('%H:%M:%S', current_time_struct), seconds / 60, |
| seconds % 60, load)) |
| self._Print(line) |
| |
| def _Finish(self, target): |
| """Mark a target as completed and unblock dependencies.""" |
| this_pkg = self._deps_map[target] |
| if this_pkg["needs"] and this_pkg["nodeps"]: |
| # We got installed, but our deps have not been installed yet. Dependent |
| # packages should only be installed when our needs have been fully met. |
| this_pkg["action"] = "nomerge" |
| else: |
| for dep in this_pkg["provides"]: |
| dep_pkg = self._deps_map[dep] |
| state = self._state_map[dep] |
| del dep_pkg["needs"][target] |
| state.update_score() |
| if not state.prefetched: |
| if dep in self._fetch_ready: |
| # If it's not currently being fetched, update the prioritization |
| self._fetch_ready.sort() |
| elif not dep_pkg["needs"]: |
| if dep_pkg["nodeps"] and dep_pkg["action"] == "nomerge": |
| self._Finish(dep) |
| else: |
| self._build_ready.put(self._state_map[dep]) |
| self._deps_map.pop(target) |
| |
| def _Retry(self): |
| while self._retry_queue: |
| state = self._retry_queue.pop(0) |
| if self._Schedule(state): |
| self._Print("Retrying emerge of %s." % state.target) |
| break |
| |
| def _Shutdown(self): |
| # Tell emerge workers to exit. They all exit when 'None' is pushed |
| # to the queue. |
| |
| # Shutdown the workers first; then jobs (which is how they feed things back) |
| # then finally the print queue. |
| |
| def _stop(queue, pool): |
| if pool is None: |
| return |
| try: |
| queue.put(None) |
| pool.close() |
| pool.join() |
| finally: |
| pool.terminate() |
| |
| _stop(self._fetch_queue, self._fetch_pool) |
| self._fetch_queue = self._fetch_pool = None |
| |
| _stop(self._build_queue, self._build_pool) |
| self._build_queue = self._build_pool = None |
| |
| if self._unpack_only: |
| _stop(self._unpack_queue, self._unpack_pool) |
| self._unpack_queue = self._unpack_pool = None |
| |
| if self._job_queue is not None: |
| self._job_queue.close() |
| self._job_queue = None |
| |
| # Now that our workers are finished, we can kill the print queue. |
| if self._print_worker is not None: |
| try: |
| self._print_queue.put(None) |
| self._print_queue.close() |
| self._print_worker.join() |
| finally: |
| self._print_worker.terminate() |
| self._print_queue = self._print_worker = None |
| |
| def Run(self): |
| """Run through the scheduled ebuilds. |
| |
| Keep running so long as we have uninstalled packages in the |
| dependency graph to merge. |
| """ |
| if not self._deps_map: |
| return |
| |
| # Start the fetchers. |
| for _ in xrange(min(self._fetch_procs, len(self._fetch_ready))): |
| state = self._fetch_ready.get() |
| self._fetch_jobs[state.target] = None |
| self._fetch_queue.put(state) |
| |
| # Print an update, then get going. |
| self._Status() |
| |
| while self._deps_map: |
| # Check here that we are actually waiting for something. |
| if (self._build_queue.empty() and |
| self._job_queue.empty() and |
| not self._fetch_jobs and |
| not self._fetch_ready and |
| not self._unpack_jobs and |
| not self._unpack_ready and |
| not self._build_jobs and |
| not self._build_ready and |
| self._deps_map): |
| # If we have failed on a package, retry it now. |
| if self._retry_queue: |
| self._Retry() |
| else: |
| # Tell the user why we're exiting. |
| if self._failed_count: |
| print('Packages failed:\n\t%s' % |
| '\n\t'.join(self._failed_count.iterkeys())) |
| status_file = os.environ.get("PARALLEL_EMERGE_STATUS_FILE") |
| if status_file: |
| failed_pkgs = set(portage.versions.cpv_getkey(x) |
| for x in self._failed_count.iterkeys()) |
| with open(status_file, "a") as f: |
| f.write("%s\n" % " ".join(failed_pkgs)) |
| else: |
| print("Deadlock! Circular dependencies!") |
| sys.exit(1) |
| |
| for _ in xrange(12): |
| try: |
| job = self._job_queue.get(timeout=5) |
| break |
| except Queue.Empty: |
| # Check if any more jobs can be scheduled. |
| self._ScheduleLoop() |
| else: |
| # Print an update every 60 seconds. |
| self._Status() |
| continue |
| |
| target = job.target |
| |
| if job.fetch_only: |
| if not job.done: |
| self._fetch_jobs[job.target] = job |
| else: |
| state = self._state_map[job.target] |
| state.prefetched = True |
| state.fetched_successfully = (job.retcode == 0) |
| del self._fetch_jobs[job.target] |
| self._Print("Fetched %s in %2.2fs" |
| % (target, time.time() - job.start_timestamp)) |
| |
| if self._show_output or job.retcode != 0: |
| self._print_queue.put(JobPrinter(job, unlink=True)) |
| else: |
| os.unlink(job.filename) |
| # Failure or not, let build work with it next. |
| if not self._deps_map[job.target]["needs"]: |
| self._build_ready.put(state) |
| self._ScheduleLoop() |
| |
| if self._unpack_only and job.retcode == 0: |
| self._unpack_ready.put(state) |
| self._ScheduleLoop(unpack_only=True) |
| |
| if self._fetch_ready: |
| state = self._fetch_ready.get() |
| self._fetch_queue.put(state) |
| self._fetch_jobs[state.target] = None |
| else: |
| # Minor optimization; shut down fetchers early since we know |
| # the queue is empty. |
| self._fetch_queue.put(None) |
| continue |
| |
| if job.unpack_only: |
| if not job.done: |
| self._unpack_jobs[target] = job |
| else: |
| del self._unpack_jobs[target] |
| self._Print("Unpacked %s in %2.2fs" |
| % (target, time.time() - job.start_timestamp)) |
| if self._show_output or job.retcode != 0: |
| self._print_queue.put(JobPrinter(job, unlink=True)) |
| else: |
| os.unlink(job.filename) |
| if self._unpack_ready: |
| state = self._unpack_ready.get() |
| self._unpack_queue.put(state) |
| self._unpack_jobs[state.target] = None |
| continue |
| |
| if not job.done: |
| self._build_jobs[target] = job |
| self._Print("Started %s (logged in %s)" % (target, job.filename)) |
| continue |
| |
| # Print output of job |
| if self._show_output or job.retcode != 0: |
| self._print_queue.put(JobPrinter(job, unlink=True)) |
| else: |
| os.unlink(job.filename) |
| del self._build_jobs[target] |
| |
| seconds = time.time() - job.start_timestamp |
| details = "%s (in %dm%.1fs)" % (target, seconds / 60, seconds % 60) |
| |
| # Complain if necessary. |
| if job.retcode != 0: |
| # Handle job failure. |
| failed_count = self._failed_count.get(target, 0) |
| if failed_count >= self._max_retries: |
| # If this job has failed and can't be retried, give up. |
| self._Print("Failed %s. Your build has failed." % details) |
| else: |
| # Queue up this build to try again after a long while. |
| self._retry_queue.append(self._state_map[target]) |
| self._failed_count[target] = failed_count + 1 |
| self._Print("Failed %s, retrying later." % details) |
| else: |
| self._Print("Completed %s" % details) |
| |
| # Mark as completed and unblock waiting ebuilds. |
| self._Finish(target) |
| |
| if target in self._failed_count and self._retry_queue: |
| # If we have successfully retried a failed package, and there |
| # are more failed packages, try the next one. We will only have |
| # one retrying package actively running at a time. |
| self._Retry() |
| |
| |
| # Schedule pending jobs and print an update. |
| self._ScheduleLoop() |
| self._Status() |
| |
| # If packages were retried, output a warning. |
| if self._failed_count: |
| self._Print("") |
| self._Print("WARNING: The following packages failed once or more,") |
| self._Print("but succeeded upon retry. This might indicate incorrect") |
| self._Print("dependencies.") |
| for pkg in self._failed_count.iterkeys(): |
| self._Print(" %s" % pkg) |
| self._Print("@@@STEP_WARNINGS@@@") |
| self._Print("") |
| |
| # Tell child threads to exit. |
| self._Print("Merge complete") |
| |
| |
| def main(argv): |
| try: |
| return real_main(argv) |
| finally: |
| # Work around multiprocessing sucking and not cleaning up after itself. |
| # http://bugs.python.org/issue4106; |
| # Step one; ensure GC is ran *prior* to the VM starting shutdown. |
| gc.collect() |
| # Step two; go looking for those threads and try to manually reap |
| # them if we can. |
| for x in threading.enumerate(): |
| # Filter on the name, and ident; if ident is None, the thread |
| # wasn't started. |
| if x.name == 'QueueFeederThread' and x.ident is not None: |
| x.join(1) |
| |
| |
| def real_main(argv): |
| parallel_emerge_args = argv[:] |
| deps = DepGraphGenerator() |
| deps.Initialize(parallel_emerge_args) |
| emerge = deps.emerge |
| |
| if emerge.action is not None: |
| argv = deps.ParseParallelEmergeArgs(argv) |
| return emerge_main(argv) |
| elif not emerge.cmdline_packages: |
| Usage() |
| return 1 |
| |
| # Unless we're in pretend mode, there's not much point running without |
| # root access. We need to be able to install packages. |
| # |
| # NOTE: Even if you're running --pretend, it's a good idea to run |
| # parallel_emerge with root access so that portage can write to the |
| # dependency cache. This is important for performance. |
| if "--pretend" not in emerge.opts and portage.data.secpass < 2: |
| print("parallel_emerge: superuser access is required.") |
| return 1 |
| |
| if "--quiet" not in emerge.opts: |
| cmdline_packages = " ".join(emerge.cmdline_packages) |
| print("Starting fast-emerge.") |
| print(" Building package %s on %s" % (cmdline_packages, |
| deps.sysroot or "root")) |
| |
| deps_tree, deps_info = deps.GenDependencyTree() |
| |
| # You want me to be verbose? I'll give you two trees! Twice as much value. |
| if "--tree" in emerge.opts and "--verbose" in emerge.opts: |
| deps.PrintTree(deps_tree) |
| |
| deps_graph = deps.GenDependencyGraph(deps_tree, deps_info) |
| |
| # OK, time to print out our progress so far. |
| deps.PrintInstallPlan(deps_graph) |
| if "--tree" in emerge.opts: |
| PrintDepsMap(deps_graph) |
| |
| # Are we upgrading portage? If so, and there are more packages to merge, |
| # schedule a restart of parallel_emerge to merge the rest. This ensures that |
| # we pick up all updates to portage settings before merging any more |
| # packages. |
| portage_upgrade = False |
| root = emerge.settings["ROOT"] |
| # pylint: disable=W0212 |
| if root == "/": |
| final_db = emerge.depgraph._dynamic_config._filtered_trees[root]['graph_db'] |
| for db_pkg in final_db.match_pkgs("sys-apps/portage"): |
| portage_pkg = deps_graph.get(db_pkg.cpv) |
| if portage_pkg: |
| portage_upgrade = True |
| if "--quiet" not in emerge.opts: |
| print("Upgrading portage first, then restarting...") |
| |
| # Upgrade Portage first, then the rest of the packages. |
| # |
| # In order to grant the child permission to run setsid, we need to run sudo |
| # again. We preserve SUDO_USER here in case an ebuild depends on it. |
| if portage_upgrade: |
| # Calculate what arguments to use when re-invoking. |
| args = ["sudo", "-E", "SUDO_USER=%s" % os.environ.get("SUDO_USER", "")] |
| args += [os.path.abspath(sys.argv[0])] + parallel_emerge_args |
| args += ["--exclude=sys-apps/portage"] |
| |
| # First upgrade Portage. |
| passthrough_args = ("--quiet", "--pretend", "--verbose") |
| emerge_args = [k for k in emerge.opts if k in passthrough_args] |
| ret = emerge_main(emerge_args + ["portage"]) |
| if ret != 0: |
| return ret |
| |
| # Now upgrade the rest. |
| os.execvp(args[0], args) |
| |
| # Attempt to solve crbug.com/433482 |
| # The file descriptor error appears only when getting userpriv_groups |
| # (lazily generated). Loading userpriv_groups here will reduce the number of |
| # calls from few hundreds to one. |
| portage.data._get_global('userpriv_groups') |
| |
| # Run the queued emerges. |
| scheduler = EmergeQueue(deps_graph, emerge, deps.package_db, deps.show_output, |
| deps.unpack_only, deps.max_retries) |
| try: |
| scheduler.Run() |
| finally: |
| # pylint: disable=W0212 |
| scheduler._Shutdown() |
| scheduler = None |
| |
| clean_logs(emerge.settings) |
| |
| print("Done") |
| return 0 |