| #!/usr/bin/python2.6 |
| # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Program to run emerge in parallel, for significant speedup. |
| |
| Usage: |
| ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps] |
| [emerge args] package" |
| |
| Basic operation: |
| Runs 'emerge -p --debug' to display dependencies, and stores a |
| dependency graph. All non-blocked packages are launched in parallel, |
| as 'emerge --nodeps package' with any blocked packages being emerged |
| immediately upon deps being met. |
| |
| For this to work effectively, /usr/lib/portage/pym/portage/locks.py |
| must be stubbed out, preventing portage from slowing itself with |
| unneccesary locking, as this script ensures that emerge is run in such |
| a way that common resources are never in conflict. This is controlled |
| by an environment variable PORTAGE_LOCKS set in parallel emerge |
| subprocesses. |
| |
| Parallel Emerge unlocks two things during operation, here's what you |
| must do to keep this safe: |
| * Storage dir containing binary packages. - Don't emerge new |
| packages while installing the existing ones. |
| * Portage database - You must not examine deps while modifying the |
| database. Therefore you may only parallelize "-p" read only access, |
| or "--nodeps" write only access. |
| Caveats: |
| * Some ebuild packages have incorrectly specified deps, and running |
| them in parallel is more likely to bring out these failures. |
| * Some ebuilds (especially the build part) have complex dependencies |
| that are not captured well by this script (it may be necessary to |
| install an old package to build, but then install a newer version |
| of the same package for a runtime dep). |
| """ |
| |
| import copy |
| import multiprocessing |
| import os |
| import Queue |
| import shlex |
| import sys |
| import tempfile |
| import time |
| import urllib2 |
| |
| # If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On |
| # Chromium OS, the default "portage" user doesn't have the necessary |
| # permissions. It'd be easier if we could default to $USERNAME, but $USERNAME |
| # is "root" here because we get called through sudo. |
| # |
| # We need to set this before importing any portage modules, because portage |
| # looks up "PORTAGE_USERNAME" at import time. |
| # |
| # NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't |
| # encounter this case unless they have an old chroot or blow away the |
| # environment by running sudo without the -E specifier. |
| if "PORTAGE_USERNAME" not in os.environ: |
| homedir = os.environ["HOME"] |
| if homedir.startswith("/home/"): |
| os.environ["PORTAGE_USERNAME"] = homedir.split("/")[2] |
| |
| # Portage doesn't expose dependency trees in its public API, so we have to |
| # make use of some private APIs here. These modules are found under |
| # /usr/lib/portage/pym/. |
| # |
| # TODO(davidjames): Update Portage to expose public APIs for these features. |
| from _emerge.actions import adjust_configs |
| from _emerge.actions import load_emerge_config |
| from _emerge.create_depgraph_params import create_depgraph_params |
| from _emerge.depgraph import backtrack_depgraph |
| from _emerge.main import emerge_main |
| from _emerge.main import parse_opts |
| from _emerge.Package import Package |
| from _emerge.Scheduler import Scheduler |
| from _emerge.stdout_spinner import stdout_spinner |
| import portage |
| import portage.debug |
| |
| |
| def Usage(): |
| """Print usage.""" |
| print "Usage:" |
| print " ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]" |
| print " [--rebuild] [emerge args] package" |
| print |
| print "Packages specified as workon packages are always built from source." |
| print "Unless --no-workon-deps is specified, packages that depend on these" |
| print "packages are also built from source." |
| print |
| print "The --workon argument is mainly useful when you want to build and" |
| print "install packages that you are working on unconditionally, but do not" |
| print "to have to rev the package to indicate you want to build it from" |
| print "source. The build_packages script will automatically supply the" |
| print "workon argument to emerge, ensuring that packages selected using" |
| print "cros-workon are rebuilt." |
| print |
| print "The --rebuild option rebuilds packages whenever their dependencies" |
| print "are changed. This ensures that your build is correct." |
| sys.exit(1) |
| |
| |
| # These are dependencies that are not specified in the package, |
| # but will prevent the package from installing. |
| secret_deps = {} |
| |
| # Global start time |
| GLOBAL_START = time.time() |
| |
| |
| class EmergeData(object): |
| """This simple struct holds various emerge variables. |
| |
| This struct helps us easily pass emerge variables around as a unit. |
| These variables are used for calculating dependencies and installing |
| packages. |
| """ |
| |
| __slots__ = ["action", "cmdline_packages", "depgraph", "mtimedb", "opts", |
| "root_config", "scheduler_graph", "settings", "spinner", |
| "trees"] |
| |
| def __init__(self): |
| # The action the user requested. If the user is installing packages, this |
| # is None. If the user is doing anything other than installing packages, |
| # this will contain the action name, which will map exactly to the |
| # long-form name of the associated emerge option. |
| # |
| # Example: If you call parallel_emerge --unmerge package, the action name |
| # will be "unmerge" |
| self.action = None |
| |
| # The list of packages the user passed on the command-line. |
| self.cmdline_packages = None |
| |
| # The emerge dependency graph. It'll contain all the packages involved in |
| # this merge, along with their versions. |
| self.depgraph = None |
| |
| # A dict of the options passed to emerge. This dict has been cleaned up |
| # a bit by parse_opts, so that it's a bit easier for the emerge code to |
| # look at the options. |
| # |
| # Emerge takes a few shortcuts in its cleanup process to make parsing of |
| # the options dict easier. For example, if you pass in "--usepkg=n", the |
| # "--usepkg" flag is just left out of the dictionary altogether. Because |
| # --usepkg=n is the default, this makes parsing easier, because emerge |
| # can just assume that if "--usepkg" is in the dictionary, it's enabled. |
| # |
| # These cleanup processes aren't applied to all options. For example, the |
| # --with-bdeps flag is passed in as-is. For a full list of the cleanups |
| # applied by emerge, see the parse_opts function in the _emerge.main |
| # package. |
| self.opts = None |
| |
| # A dictionary used by portage to maintain global state. This state is |
| # loaded from disk when portage starts up, and saved to disk whenever we |
| # call mtimedb.commit(). |
| # |
| # This database contains information about global updates (i.e., what |
| # version of portage we have) and what we're currently doing. Portage |
| # saves what it is currently doing in this database so that it can be |
| # resumed when you call it with the --resume option. |
| # |
| # parallel_emerge does not save what it is currently doing in the mtimedb, |
| # so we do not support the --resume option. |
| self.mtimedb = None |
| |
| # The portage configuration for our current root. This contains the portage |
| # settings (see below) and the three portage trees for our current root. |
| # (The three portage trees are explained below, in the documentation for |
| # the "trees" member.) |
| self.root_config = None |
| |
| # The scheduler graph is used by emerge to calculate what packages to |
| # install. We don't actually install any deps, so this isn't really used, |
| # but we pass it in to the Scheduler object anyway. |
| self.scheduler_graph = None |
| |
| # Portage settings for our current session. Most of these settings are set |
| # in make.conf inside our current install root. |
| self.settings = None |
| |
| # The spinner, which spews stuff to stdout to indicate that portage is |
| # doing something. We maintain our own spinner, so we set the portage |
| # spinner to "silent" mode. |
| self.spinner = None |
| |
| # The portage trees. There are separate portage trees for each root. To get |
| # the portage tree for the current root, you can look in self.trees[root], |
| # where root = self.settings["ROOT"]. |
| # |
| # In each root, there are three trees: vartree, porttree, and bintree. |
| # - vartree: A database of the currently-installed packages. |
| # - porttree: A database of ebuilds, that can be used to build packages. |
| # - bintree: A database of binary packages. |
| self.trees = None |
| |
| |
| class DepGraphGenerator(object): |
| """Grab dependency information about packages from portage. |
| |
| Typical usage: |
| deps = DepGraphGenerator() |
| deps.Initialize(sys.argv[1:]) |
| deps_tree, deps_info = deps.GenDependencyTree() |
| deps_graph = deps.GenDependencyGraph(deps_tree, deps_info) |
| deps.PrintTree(deps_tree) |
| PrintDepsMap(deps_graph) |
| """ |
| |
| __slots__ = ["board", "emerge", "mandatory_source", "no_workon_deps", |
| "package_db", "rebuild"] |
| |
| def __init__(self): |
| self.board = None |
| self.emerge = EmergeData() |
| self.mandatory_source = set() |
| self.no_workon_deps = False |
| self.package_db = {} |
| self.rebuild = False |
| |
| def ParseParallelEmergeArgs(self, argv): |
| """Read the parallel emerge arguments from the command-line. |
| |
| We need to be compatible with emerge arg format. We scrape arguments that |
| are specific to parallel_emerge, and pass through the rest directly to |
| emerge. |
| Args: |
| argv: arguments list |
| Returns: |
| Arguments that don't belong to parallel_emerge |
| """ |
| emerge_args = [] |
| for arg in argv: |
| # Specifically match arguments that are specific to parallel_emerge, and |
| # pass through the rest. |
| if arg.startswith("--board="): |
| self.board = arg.replace("--board=", "") |
| elif arg.startswith("--workon="): |
| workon_str = arg.replace("--workon=", "") |
| package_list = shlex.split(" ".join(shlex.split(workon_str))) |
| self.mandatory_source.update(package_list) |
| elif arg == "--no-workon-deps": |
| self.no_workon_deps = True |
| elif arg == "--rebuild": |
| self.rebuild = True |
| else: |
| # Not one of our options, so pass through to emerge. |
| emerge_args.append(arg) |
| |
| if self.rebuild: |
| if self.no_workon_deps: |
| print "--rebuild is not compatible with --no-workon-deps" |
| sys.exit(1) |
| |
| return emerge_args |
| |
| def Initialize(self, args): |
| """Initializer. Parses arguments and sets up portage state.""" |
| |
| # Parse and strip out args that are just intended for parallel_emerge. |
| emerge_args = self.ParseParallelEmergeArgs(args) |
| |
| # Setup various environment variables based on our current board. These |
| # variables are normally setup inside emerge-${BOARD}, but since we don't |
| # call that script, we have to set it up here. These variables serve to |
| # point our tools at /build/BOARD and to setup cross compiles to the |
| # appropriate board as configured in toolchain.conf. |
| if self.board: |
| os.environ["PORTAGE_CONFIGROOT"] = "/build/" + self.board |
| os.environ["PORTAGE_SYSROOT"] = "/build/" + self.board |
| os.environ["SYSROOT"] = "/build/" + self.board |
| scripts_dir = os.path.dirname(os.path.realpath(__file__)) |
| toolchain_path = "%s/../overlays/overlay-%s/toolchain.conf" |
| # Strip the variant out of the board name to look for the toolchain. This |
| # is similar to what setup_board does. |
| board_no_variant = self.board.split('_')[0] |
| f = open(toolchain_path % (scripts_dir, board_no_variant)) |
| os.environ["CHOST"] = f.readline().strip() |
| f.close() |
| |
| # Although CHROMEOS_ROOT isn't specific to boards, it's normally setup |
| # inside emerge-${BOARD}, so we set it up here for compatibility. It |
| # will be going away soon as we migrate to CROS_WORKON_SRCROOT. |
| os.environ.setdefault("CHROMEOS_ROOT", os.environ["HOME"] + "/trunk") |
| |
| # Modify the environment to disable locking. |
| os.environ["PORTAGE_LOCKS"] = "false" |
| os.environ["UNMERGE_DELAY"] = "0" |
| |
| # Parse the emerge options. |
| action, opts, cmdline_packages = parse_opts(emerge_args) |
| |
| # If we're installing to the board, we want the --root-deps option so that |
| # portage will install the build dependencies to that location as well. |
| if self.board: |
| opts.setdefault("--root-deps", True) |
| |
| # Set environment variables based on options. Portage normally sets these |
| # environment variables in emerge_main, but we can't use that function, |
| # because it also does a bunch of other stuff that we don't want. |
| # TODO(davidjames): Patch portage to move this logic into a function we can |
| # reuse here. |
| if "--debug" in opts: |
| os.environ["PORTAGE_DEBUG"] = "1" |
| if "--config-root" in opts: |
| os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"] |
| if "--root" in opts: |
| os.environ["ROOT"] = opts["--root"] |
| if "--accept-properties" in opts: |
| os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"] |
| |
| # Now that we've setup the necessary environment variables, we can load the |
| # emerge config from disk. |
| settings, trees, mtimedb = load_emerge_config() |
| |
| # Check whether our portage tree is out of date. Typically, this happens |
| # when you're setting up a new portage tree, such as in setup_board and |
| # make_chroot. In that case, portage applies a bunch of global updates |
| # here. Once the updates are finished, we need to commit any changes |
| # that the global update made to our mtimedb, and reload the config. |
| # |
| # Portage normally handles this logic in emerge_main, but again, we can't |
| # use that function here. |
| if portage._global_updates(trees, mtimedb["updates"]): |
| mtimedb.commit() |
| settings, trees, mtimedb = load_emerge_config(trees=trees) |
| |
| # Setup implied options. Portage normally handles this logic in |
| # emerge_main. |
| if "--buildpkgonly" in opts or "buildpkg" in settings.features: |
| opts.setdefault("--buildpkg", True) |
| if "--getbinpkgonly" in opts: |
| opts.setdefault("--usepkgonly", True) |
| opts.setdefault("--getbinpkg", True) |
| if "getbinpkg" in settings.features: |
| # Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n |
| opts["--getbinpkg"] = True |
| if "--getbinpkg" in opts or "--usepkgonly" in opts: |
| opts.setdefault("--usepkg", True) |
| if "--fetch-all-uri" in opts: |
| opts.setdefault("--fetchonly", True) |
| if "--skipfirst" in opts: |
| opts.setdefault("--resume", True) |
| if "--buildpkgonly" in opts: |
| # --buildpkgonly will not merge anything, so it overrides all binary |
| # package options. |
| for opt in ("--getbinpkg", "--getbinpkgonly", |
| "--usepkg", "--usepkgonly"): |
| opts.pop(opt, None) |
| if (settings.get("PORTAGE_DEBUG", "") == "1" and |
| "python-trace" in settings.features): |
| portage.debug.set_trace(True) |
| |
| # Complain about unsupported options |
| for opt in ("--ask", "--ask-enter-invalid", "--complete-graph", |
| "--resume", "--skipfirst"): |
| if opt in opts: |
| print "%s is not supported by parallel_emerge" % opt |
| sys.exit(1) |
| |
| # Make emerge specific adjustments to the config (e.g. colors!) |
| adjust_configs(opts, trees) |
| |
| # Save our configuration so far in the emerge object |
| emerge = self.emerge |
| emerge.action, emerge.opts = action, opts |
| emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb |
| emerge.cmdline_packages = cmdline_packages |
| root = settings["ROOT"] |
| emerge.root_config = trees[root]["root_config"] |
| |
| def GenDependencyTree(self): |
| """Get dependency tree info from emerge. |
| |
| TODO(): Update cros_extract_deps to also use this code. |
| Returns: |
| Dependency tree |
| """ |
| start = time.time() |
| |
| # Setup emerge options. |
| # |
| # We treat dependency info a bit differently than emerge itself. Unless |
| # you're using --usepkgonly, we disable --getbinpkg and --usepkg here so |
| # that emerge will look at the dependencies of the source ebuilds rather |
| # than the binary dependencies. This helps ensure that we have the option |
| # of merging a package from source, if we want to switch to it with |
| # --workon and the dependencies have changed. |
| emerge = self.emerge |
| emerge_opts = emerge.opts.copy() |
| emerge_opts.pop("--getbinpkg", None) |
| if "--usepkgonly" not in emerge_opts: |
| emerge_opts.pop("--usepkg", None) |
| if self.mandatory_source or self.rebuild: |
| # Enable --emptytree so that we get the full tree, which we need for |
| # dependency analysis. By default, with this option, emerge optimizes |
| # the graph by removing uninstall instructions from the graph. By |
| # specifying --tree as well, we tell emerge that it's not safe to remove |
| # uninstall instructions because we're planning on analyzing the output. |
| emerge_opts["--tree"] = True |
| emerge_opts["--emptytree"] = True |
| |
| # Create a list of packages to merge |
| packages = set(emerge.cmdline_packages[:]) |
| if self.mandatory_source: |
| packages.update(self.mandatory_source) |
| |
| # Tell emerge to be quiet. We print plenty of info ourselves so we don't |
| # need any extra output from portage. |
| portage.util.noiselimit = -1 |
| |
| # My favorite feature: The silent spinner. It doesn't spin. Ever. |
| # I'd disable the colors by default too, but they look kind of cool. |
| emerge.spinner = stdout_spinner() |
| emerge.spinner.update = emerge.spinner.update_quiet |
| |
| if "--quiet" not in emerge.opts: |
| print "Calculating deps..." |
| |
| # Ask portage to build a dependency graph. with the options we specified |
| # above. |
| params = create_depgraph_params(emerge_opts, emerge.action) |
| success, depgraph, _ = backtrack_depgraph( |
| emerge.settings, emerge.trees, emerge_opts, params, emerge.action, |
| packages, emerge.spinner) |
| emerge.depgraph = depgraph |
| |
| # Is it impossible to honor the user's request? Bail! |
| if not success: |
| depgraph.display_problems() |
| sys.exit(1) |
| |
| # Build our own tree from the emerge digraph. |
| deps_tree = {} |
| digraph = depgraph._dynamic_config.digraph |
| for node, node_deps in digraph.nodes.items(): |
| # Calculate dependency packages that need to be installed first. Each |
| # child on the digraph is a dependency. The "operation" field specifies |
| # what we're doing (e.g. merge, uninstall, etc.). The "priorities" array |
| # contains the type of dependency (e.g. build, runtime, runtime_post, |
| # etc.) |
| # |
| # Emerge itself actually treats some dependencies as "soft" dependencies |
| # and sometimes ignores them. We don't do that -- we honor all |
| # dependencies unless we're forced to prune them because they're cyclic. |
| # |
| # Portage refers to the identifiers for packages as a CPV. This acronym |
| # stands for Component/Path/Version. |
| # |
| # Here's an example CPV: chromeos-base/power_manager-0.0.1-r1 |
| # Split up, this CPV would be: |
| # C -- Component: chromeos-base |
| # P -- Path: power_manager |
| # V -- Version: 0.0.1-r1 |
| # |
| # We just refer to CPVs as packages here because it's easier. |
| deps = {} |
| for child, priorities in node_deps[0].items(): |
| deps[str(child.cpv)] = dict(action=str(child.operation), |
| deptype=str(priorities[-1]), |
| deps={}) |
| |
| # We've built our list of deps, so we can add our package to the tree. |
| if isinstance(node, Package): |
| deps_tree[str(node.cpv)] = dict(action=str(node.operation), |
| deps=deps) |
| |
| emptytree = "--emptytree" in emerge.opts |
| |
| # Ask portage for its install plan, so that we can only throw out |
| # dependencies that portage throws out. Also, keep track of the old |
| # versions of packages that we're either upgrading or replacing. |
| # |
| # The "vardb" is the database of installed packages. |
| vardb = emerge.trees[emerge.settings["ROOT"]]["vartree"].dbapi |
| deps_info = {} |
| for pkg in depgraph.altlist(): |
| if isinstance(pkg, Package): |
| # If we're not in emptytree mode, and we're going to replace a package |
| # that is already installed, then this operation is possibly optional. |
| # ("--selective" mode is handled later, in RemoveInstalledPackages()) |
| optional = False |
| if not emptytree and vardb.cpv_exists(pkg.cpv): |
| optional = True |
| |
| # Add the package to our database. |
| self.package_db[str(pkg.cpv)] = pkg |
| |
| # Save off info about the package |
| deps_info[str(pkg.cpv)] = {"idx": len(deps_info), |
| "optional": optional} |
| |
| # Delete the --tree option, because we don't really want to display a |
| # tree. We just wanted to get emerge to leave uninstall instructions on |
| # the graph. Later, when we display the graph, we'll want standard-looking |
| # output, so removing the --tree option is important. |
| depgraph._frozen_config.myopts.pop("--tree", None) |
| |
| seconds = time.time() - start |
| if "--quiet" not in emerge.opts: |
| print "Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60) |
| |
| return deps_tree, deps_info |
| |
| def PrintTree(self, deps, depth=""): |
| """Print the deps we have seen in the emerge output. |
| |
| Args: |
| deps: Dependency tree structure. |
| depth: Allows printing the tree recursively, with indentation. |
| """ |
| for entry in sorted(deps): |
| action = deps[entry]["action"] |
| print "%s %s (%s)" % (depth, entry, action) |
| self.PrintTree(deps[entry]["deps"], depth=depth + " ") |
| |
| def GenDependencyGraph(self, deps_tree, deps_info): |
| """Generate a doubly linked dependency graph. |
| |
| Args: |
| deps_tree: Dependency tree structure. |
| deps_info: More details on the dependencies. |
| Returns: |
| Deps graph in the form of a dict of packages, with each package |
| specifying a "needs" list and "provides" list. |
| """ |
| emerge = self.emerge |
| root = emerge.settings["ROOT"] |
| |
| # It's useful to know what packages will actually end up on the |
| # system at some point. Packages in final_db are either already |
| # installed, or will be installed by the time we're done. |
| final_db = emerge.depgraph._dynamic_config.mydbapi[root] |
| |
| # final_pkgs is a set of the packages we found in the final_db. These |
| # packages are either already installed, or will be installed by the time |
| # we're done. It's populated in BuildFinalPackageSet() |
| final_pkgs = set() |
| |
| # deps_map is the actual dependency graph. |
| # |
| # Each package specifies a "needs" list and a "provides" list. The "needs" |
| # list indicates which packages we depend on. The "provides" list |
| # indicates the reverse dependencies -- what packages need us. |
| # |
| # We also provide some other information in the dependency graph: |
| # - action: What we're planning on doing with this package. Generally, |
| # "merge", "nomerge", or "uninstall" |
| # - mandatory_source: |
| # If true, indicates that this package must be compiled from source. |
| # We set this for "workon" packages, and for packages where the |
| # binaries are known to be out of date. |
| # - mandatory: |
| # If true, indicates that this package must be installed. We don't care |
| # whether it's binary or source, unless the mandatory_source flag is |
| # also set. |
| # |
| deps_map = {} |
| |
| def ReverseTree(packages): |
| """Convert tree to digraph. |
| |
| Take the tree of package -> requirements and reverse it to a digraph of |
| buildable packages -> packages they unblock. |
| Args: |
| packages: Tree(s) of dependencies. |
| Returns: |
| Unsanitized digraph. |
| """ |
| for pkg in packages: |
| |
| # Create an entry for the package |
| action = packages[pkg]["action"] |
| default_pkg = {"needs": {}, "provides": set(), "action": action, |
| "mandatory_source": False, "mandatory": False} |
| this_pkg = deps_map.setdefault(pkg, default_pkg) |
| |
| # Create entries for dependencies of this package first. |
| ReverseTree(packages[pkg]["deps"]) |
| |
| # Add dependencies to this package. |
| for dep, dep_item in packages[pkg]["deps"].iteritems(): |
| dep_pkg = deps_map[dep] |
| dep_type = dep_item["deptype"] |
| if dep_type != "runtime_post": |
| dep_pkg["provides"].add(pkg) |
| this_pkg["needs"][dep] = dep_type |
| |
| def BuildFinalPackageSet(): |
| # If this package is installed, or will get installed, add it to |
| # final_pkgs |
| for pkg in deps_map: |
| for match in final_db.match_pkgs(pkg): |
| final_pkgs.add(str(match.cpv)) |
| |
| def FindCycles(): |
| """Find cycles in the dependency tree. |
| |
| Returns: |
| Dict of packages involved in cyclic dependencies, mapping each package |
| to a list of the cycles the package is involved in. |
| """ |
| |
| def FindCyclesAtNode(pkg, cycles, unresolved, resolved): |
| """Find cycles in cyclic dependencies starting at specified package. |
| |
| Args: |
| pkg: Package identifier. |
| cycles: Set of cycles so far. |
| unresolved: Nodes that have been visited but are not fully processed. |
| resolved: Nodes that have been visited and are fully processed. |
| Returns: |
| Whether a cycle was found. |
| """ |
| if pkg in resolved: |
| return |
| unresolved.append(pkg) |
| for dep in deps_map[pkg]["needs"]: |
| if dep in unresolved: |
| idx = unresolved.index(dep) |
| mycycle = unresolved[idx:] + [dep] |
| for cycle_pkg in mycycle: |
| info = cycles.setdefault(cycle_pkg, {}) |
| info.setdefault("pkgs", set()).update(mycycle) |
| info.setdefault("cycles", []).append(mycycle) |
| else: |
| FindCyclesAtNode(dep, cycles, unresolved, resolved) |
| unresolved.pop() |
| resolved.add(pkg) |
| |
| cycles, unresolved, resolved = {}, [], set() |
| for pkg in deps_map: |
| FindCyclesAtNode(pkg, cycles, unresolved, resolved) |
| return cycles |
| |
| def RemoveInstalledPackages(): |
| """Remove installed packages, propagating dependencies.""" |
| |
| # If we're not in selective mode, the packages on the command line are |
| # not optional. |
| if "--selective" in emerge.opts: |
| selective = emerge.opts["--selective"] != "n" |
| else: |
| selective = "--noreplace" in emerge.opts or "--update" in emerge.opts |
| if not selective: |
| for pkg in emerge.cmdline_packages: |
| for db_pkg in final_db.match_pkgs(pkg): |
| deps_info[db_pkg.cpv]["optional"] = False |
| |
| # Schedule packages that aren't on the install list for removal |
| rm_pkgs = set(deps_map.keys()) - set(deps_info.keys()) |
| |
| # Schedule optional packages for removal |
| for pkg, info in deps_info.items(): |
| if info["optional"]: |
| rm_pkgs.add(pkg) |
| |
| # Remove the packages we don't want, simplifying the graph and making |
| # it easier for us to crack cycles. |
| for pkg in sorted(rm_pkgs): |
| this_pkg = deps_map[pkg] |
| needs = this_pkg["needs"] |
| provides = this_pkg["provides"] |
| for dep in needs: |
| dep_provides = deps_map[dep]["provides"] |
| dep_provides.update(provides) |
| dep_provides.discard(pkg) |
| dep_provides.discard(dep) |
| for target in provides: |
| target_needs = deps_map[target]["needs"] |
| target_needs.update(needs) |
| target_needs.pop(pkg, None) |
| target_needs.pop(target, None) |
| del deps_map[pkg] |
| |
| def SanitizeTree(cycles): |
| """Remove circular dependencies. |
| |
| We only prune circular dependencies that go against the emerge ordering. |
| This has a nice property: we're guaranteed to merge dependencies in the |
| same order that portage does. |
| |
| Because we don't treat any dependencies as "soft" unless they're killed |
| by a cycle, we pay attention to a larger number of dependencies when |
| merging. This hurts performance a bit, but helps reliability. |
| |
| Args: |
| cycles: Dict of packages involved in cyclic dependencies, mapping each |
| package to a list of the cycles the package is involved in. Produced |
| by FindCycles(). |
| """ |
| for basedep in set(cycles).intersection(deps_map): |
| this_pkg = deps_map[basedep] |
| for dep in this_pkg["provides"].intersection(cycles[basedep]["pkgs"]): |
| if deps_info[basedep]["idx"] >= deps_info[dep]["idx"]: |
| for mycycle in cycles[basedep]["cycles"]: |
| if dep in mycycle: |
| print "Breaking %s -> %s in cycle:" % (dep, basedep) |
| for i in range(len(mycycle) - 1): |
| needs = deps_map[mycycle[i]]["needs"] |
| deptype = needs.get(mycycle[i+1], "deleted") |
| print " %s -> %s (%s)" % (mycycle[i], mycycle[i+1], deptype) |
| del deps_map[dep]["needs"][basedep] |
| this_pkg["provides"].remove(dep) |
| break |
| |
| def AddSecretDeps(): |
| """Find these tagged packages and add extra dependencies. |
| |
| For debugging dependency problems. |
| """ |
| for bad in secret_deps: |
| needed = secret_deps[bad] |
| bad_pkg = None |
| needed_pkg = None |
| for dep in deps_map: |
| if dep.find(bad) != -1: |
| bad_pkg = dep |
| if dep.find(needed) != -1: |
| needed_pkg = dep |
| if bad_pkg and needed_pkg: |
| deps_map[needed_pkg]["provides"].add(bad_pkg) |
| deps_map[bad_pkg]["needs"][needed_pkg] = "secret" |
| |
| def MergeChildren(pkg, merge_type): |
| """Merge this package and all packages it provides.""" |
| |
| this_pkg = deps_map[pkg] |
| if this_pkg[merge_type] or pkg not in final_pkgs: |
| return set() |
| |
| # Mark this package as non-optional |
| deps_info[pkg]["optional"] = False |
| this_pkg[merge_type] = True |
| for w in this_pkg["provides"]: |
| MergeChildren(w, merge_type) |
| |
| if this_pkg["action"] == "nomerge": |
| this_pkg["action"] = "merge" |
| |
| def RemotePackageDatabase(): |
| """Grab the latest binary package database from the prebuilt server. |
| |
| We need to know the modification times of the prebuilt packages so that we |
| know when it is OK to use these packages and when we should rebuild them |
| instead. |
| |
| Returns: |
| A dict mapping package identifiers to modification times. |
| """ |
| url = self.emerge.settings["PORTAGE_BINHOST"] + "/Packages" |
| |
| prebuilt_pkgs = {} |
| f = urllib2.urlopen(url) |
| for line in f: |
| if line.startswith("CPV: "): |
| pkg = line.replace("CPV: ", "").rstrip() |
| elif line.startswith("MTIME: "): |
| prebuilt_pkgs[pkg] = int(line[:-1].replace("MTIME: ", "")) |
| f.close() |
| |
| return prebuilt_pkgs |
| |
| def LocalPackageDatabase(): |
| """Get the modification times of the packages in the local database. |
| |
| We need to know the modification times of the local packages so that we |
| know when they need to be rebuilt. |
| |
| Returns: |
| A dict mapping package identifiers to modification times. |
| """ |
| if self.board: |
| path = "/build/%s/packages/Packages" % self.board |
| else: |
| path = "/var/lib/portage/pkgs/Packages" |
| local_pkgs = {} |
| for line in file(path): |
| if line.startswith("CPV: "): |
| pkg = line.replace("CPV: ", "").rstrip() |
| elif line.startswith("MTIME: "): |
| local_pkgs[pkg] = int(line[:-1].replace("MTIME: ", "")) |
| |
| return local_pkgs |
| |
| def AutoRebuildDeps(local_pkgs, remote_pkgs, cycles): |
| """Recursively rebuild packages when necessary using modification times. |
| |
| If you've modified a package, it's a good idea to rebuild all the packages |
| that depend on it from source. This function looks for any packages which |
| depend on packages that have been modified and ensures that they get |
| rebuilt. |
| |
| Args: |
| local_pkgs: Modification times from the local database. |
| remote_pkgs: Modification times from the prebuilt server. |
| cycles: Dictionary returned from FindCycles() |
| |
| Returns: |
| The set of packages we marked as needing to be merged. |
| """ |
| |
| def PrebuiltsReady(pkg, pkg_db, cache): |
| """Check whether the prebuilts are ready for pkg and all deps. |
| |
| Args: |
| pkg: The specified package. |
| pkg_db: The package DB to use. |
| cache: A dict, where the results are stored. |
| |
| Returns: |
| True iff the prebuilts are ready for pkg and all deps. |
| """ |
| if pkg in cache: |
| return cache[pkg] |
| if pkg not in pkg_db: |
| cache[pkg] = False |
| else: |
| cache[pkg] = True |
| for dep in deps_map[pkg]["needs"]: |
| if not PrebuiltsReady(dep, pkg_db, cache): |
| cache[pkg] = False |
| break |
| return cache[pkg] |
| |
| def LastModifiedWithDeps(pkg, pkg_db, cache): |
| """Calculate the last modified time of a package and its dependencies. |
| |
| This function looks at all the packages needed by the specified package |
| and checks the most recent modification time of all of those packages. |
| If the dependencies of a package were modified more recently than the |
| package itself, then we know the package needs to be rebuilt. |
| |
| Args: |
| pkg: The specified package. |
| pkg_db: The package DB to use. |
| cache: A dict, where the last modified times are stored. |
| |
| Returns: |
| The last modified time of the specified package and its dependencies. |
| """ |
| if pkg in cache: |
| return cache[pkg] |
| |
| cache[pkg] = pkg_db.get(pkg, 0) |
| for dep in deps_map[pkg]["needs"]: |
| t = LastModifiedWithDeps(dep, pkg_db, cache) |
| cache[pkg] = max(cache[pkg], t) |
| return cache[pkg] |
| |
| # For every package that's getting updated in our local cache (binary |
| # or source), make sure we also update the children. If a package is |
| # built from source, all children must also be built from source. |
| local_ready_cache, remote_ready_cache = {}, {} |
| local_mtime_cache, remote_mtime_cache = {}, {} |
| for pkg in final_pkgs: |
| # If all the necessary local packages are ready, and their |
| # modification times are in sync, we don't need to do anything here. |
| local_mtime = LastModifiedWithDeps(pkg, local_pkgs, local_mtime_cache) |
| local_ready = PrebuiltsReady(pkg, local_pkgs, local_ready_cache) |
| if (not local_ready or local_pkgs.get(pkg, 0) < local_mtime and |
| pkg not in cycles): |
| # OK, at least one package is missing from the local cache or is |
| # outdated. This means we're going to have to install the package |
| # and all dependencies. |
| # |
| # If all the necessary remote packages are ready, and they're at |
| # least as new as our local packages, we can install them. |
| # Otherwise, we need to build from source. |
| remote_mtime = LastModifiedWithDeps(pkg, remote_pkgs, |
| remote_mtime_cache) |
| remote_ready = PrebuiltsReady(pkg, remote_pkgs, remote_ready_cache) |
| if remote_ready and (local_mtime <= remote_mtime or pkg in cycles): |
| MergeChildren(pkg, "mandatory") |
| else: |
| MergeChildren(pkg, "mandatory_source") |
| |
| def UsePrebuiltPackages(): |
| """Update packages that can use prebuilts to do so.""" |
| start = time.time() |
| |
| # The bintree is the database of binary packages. By default, it's |
| # empty. |
| bintree = emerge.trees[root]["bintree"] |
| bindb = bintree.dbapi |
| root_config = emerge.root_config |
| prebuilt_pkgs = {} |
| |
| # Populate the DB with packages |
| bintree.populate("--getbinpkg" in emerge.opts, |
| "--getbinpkgonly" in emerge.opts) |
| |
| # Update packages that can use prebuilts to do so. |
| for pkg, info in deps_map.iteritems(): |
| if info and not info["mandatory_source"] and info["action"] == "merge": |
| db_keys = list(bindb._aux_cache_keys) |
| try: |
| db_vals = bindb.aux_get(pkg, db_keys + ["MTIME"]) |
| except KeyError: |
| # No binary package |
| continue |
| mtime = int(db_vals.pop() or 0) |
| metadata = zip(db_keys, db_vals) |
| db_pkg = Package(built=True, cpv=pkg, installed=False, |
| metadata=metadata, onlydeps=False, mtime=mtime, |
| operation="merge", root_config=root_config, |
| type_name="binary") |
| self.package_db[pkg] = db_pkg |
| |
| seconds = time.time() - start |
| if "--quiet" not in emerge.opts: |
| print "Prebuilt DB populated in %dm%.1fs" % (seconds / 60, seconds % 60) |
| |
| return prebuilt_pkgs |
| |
| def AddRemainingPackages(): |
| """Fill in packages that don't have entries in the package db. |
| |
| Every package we are installing needs an entry in the package db. |
| This function should only be called after we have removed the |
| packages that are not being merged from our deps_map. |
| """ |
| for pkg in deps_map: |
| if pkg not in self.package_db: |
| if deps_map[pkg]["action"] != "merge": |
| # We should only fill in packages that are being merged. If |
| # there's any other packages here, something funny is going on. |
| print "Missing entry for %s in package db" % pkg |
| sys.exit(1) |
| |
| db_pkg = emerge.depgraph._pkg(pkg, "ebuild", emerge.root_config) |
| self.package_db[pkg] = db_pkg |
| |
| ReverseTree(deps_tree) |
| BuildFinalPackageSet() |
| AddSecretDeps() |
| |
| if self.no_workon_deps: |
| for pkg in self.mandatory_source.copy(): |
| for db_pkg in final_db.match_pkgs(pkg): |
| deps_map[str(db_pkg.cpv)]["mandatory_source"] = True |
| else: |
| for pkg in self.mandatory_source.copy(): |
| for db_pkg in final_db.match_pkgs(pkg): |
| MergeChildren(str(db_pkg.cpv), "mandatory_source") |
| |
| cycles = FindCycles() |
| if self.rebuild: |
| local_pkgs = LocalPackageDatabase() |
| remote_pkgs = RemotePackageDatabase() |
| AutoRebuildDeps(local_pkgs, remote_pkgs, cycles) |
| |
| # We need to remove installed packages so that we can use the dependency |
| # ordering of the install process to show us what cycles to crack. Once |
| # we've done that, we also need to recalculate our list of cycles so that |
| # we don't include the installed packages in our cycles. |
| RemoveInstalledPackages() |
| cycles = FindCycles() |
| SanitizeTree(cycles) |
| if deps_map: |
| if "--usepkg" in emerge.opts: |
| UsePrebuiltPackages() |
| AddRemainingPackages() |
| return deps_map |
| |
| def PrintInstallPlan(self, deps_map): |
| """Print an emerge-style install plan. |
| |
| The install plan lists what packages we're installing, in order. |
| It's useful for understanding what parallel_emerge is doing. |
| |
| Args: |
| deps_map: The dependency graph. |
| """ |
| |
| def InstallPlanAtNode(target, deps_map): |
| nodes = [] |
| nodes.append(target) |
| for dep in deps_map[target]["provides"]: |
| del deps_map[dep]["needs"][target] |
| if not deps_map[dep]["needs"]: |
| nodes.extend(InstallPlanAtNode(dep, deps_map)) |
| return nodes |
| |
| deps_map = copy.deepcopy(deps_map) |
| install_plan = [] |
| plan = set() |
| for target, info in deps_map.iteritems(): |
| if not info["needs"] and target not in plan: |
| for item in InstallPlanAtNode(target, deps_map): |
| plan.add(item) |
| install_plan.append(self.package_db[item]) |
| |
| self.emerge.depgraph.display(install_plan) |
| |
| |
| def PrintDepsMap(deps_map): |
| """Print dependency graph, for each package list it's prerequisites.""" |
| for i in deps_map: |
| print "%s: (%s) needs" % (i, deps_map[i]["action"]) |
| needs = deps_map[i]["needs"] |
| for j in needs: |
| print " %s" % (j) |
| if not needs: |
| print " no dependencies" |
| |
| |
| def EmergeWorker(task_queue, done_queue, emerge, package_db): |
| """This worker emerges any packages given to it on the task_queue. |
| |
| Args: |
| task_queue: The queue of tasks for this worker to do. |
| done_queue: The queue of results from the worker. |
| emerge: An EmergeData() object. |
| package_db: A dict, mapping package ids to portage Package objects. |
| |
| It expects package identifiers to be passed to it via task_queue. When |
| the package is merged, it pushes (target, retval, outputstr) into the |
| done_queue. |
| """ |
| |
| settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb |
| opts, spinner = emerge.opts, emerge.spinner |
| opts["--nodeps"] = True |
| while True: |
| target = task_queue.get() |
| print "Emerging", target |
| db_pkg = package_db[target] |
| db_pkg.root_config = emerge.root_config |
| install_list = [db_pkg] |
| output = tempfile.TemporaryFile() |
| outputstr = "" |
| if "--pretend" in opts: |
| retval = 0 |
| else: |
| save_stdout = sys.stdout |
| save_stderr = sys.stderr |
| try: |
| sys.stdout = output |
| sys.stderr = output |
| scheduler = Scheduler(settings, trees, mtimedb, opts, spinner, |
| install_list, [], emerge.scheduler_graph) |
| retval = scheduler.merge() |
| finally: |
| sys.stdout = save_stdout |
| sys.stderr = save_stderr |
| if retval is None: |
| retval = 0 |
| if retval != 0: |
| output.seek(0) |
| outputstr = output.read() |
| |
| done_queue.put((target, retval, outputstr)) |
| |
| |
| class EmergeQueue(object): |
| """Class to schedule emerge jobs according to a dependency graph.""" |
| |
| def __init__(self, deps_map, emerge, package_db): |
| # Store the dependency graph. |
| self._deps_map = deps_map |
| # Initialize the running queue to empty |
| self._jobs = set() |
| # List of total package installs represented in deps_map. |
| install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"] |
| self._total_jobs = len(install_jobs) |
| |
| if "--pretend" in emerge.opts: |
| print "Skipping merge because of --pretend mode." |
| sys.exit(0) |
| |
| # Setup scheduler graph object. This is used by the child processes |
| # to help schedule jobs. |
| emerge.scheduler_graph = emerge.depgraph.schedulerGraph() |
| |
| procs = min(self._total_jobs, |
| emerge.opts.get("--jobs", multiprocessing.cpu_count())) |
| self._emerge_queue = multiprocessing.Queue() |
| self._done_queue = multiprocessing.Queue() |
| args = (self._emerge_queue, self._done_queue, emerge, package_db) |
| self._pool = multiprocessing.Pool(procs, EmergeWorker, args) |
| |
| # Initialize the failed queue to empty. |
| self._retry_queue = [] |
| self._failed = {} |
| |
| # Print an update before we launch the merges. |
| self._Status() |
| |
| for target, info in deps_map.items(): |
| if not info["needs"]: |
| self._Schedule(target) |
| |
| def _Schedule(self, target): |
| # We maintain a tree of all deps, if this doesn't need |
| # to be installed just free up it's children and continue. |
| # It is possible to reinstall deps of deps, without reinstalling |
| # first level deps, like so: |
| # chromeos (merge) -> eselect (nomerge) -> python (merge) |
| if self._deps_map[target]["action"] == "nomerge": |
| self._Finish(target) |
| else: |
| # Kick off the build if it's marked to be built. |
| self._jobs.add(target) |
| self._emerge_queue.put(target) |
| |
| def _LoadAvg(self): |
| loads = open("/proc/loadavg", "r").readline().split()[:3] |
| return " ".join(loads) |
| |
| def _Status(self): |
| """Print status.""" |
| seconds = time.time() - GLOBAL_START |
| line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s " |
| "[Time %dm%.1fs Load %s]") |
| qsize = self._emerge_queue.qsize() |
| print line % (len(self._deps_map), qsize, len(self._jobs) - qsize, |
| len(self._retry_queue), self._total_jobs, |
| seconds / 60, seconds % 60, self._LoadAvg()) |
| |
| def _Finish(self, target): |
| """Mark a target as completed and unblock dependecies.""" |
| for dep in self._deps_map[target]["provides"]: |
| del self._deps_map[dep]["needs"][target] |
| if not self._deps_map[dep]["needs"]: |
| self._Schedule(dep) |
| self._deps_map.pop(target) |
| |
| def _Retry(self): |
| if self._retry_queue: |
| target = self._retry_queue.pop(0) |
| self._Schedule(target) |
| print "Retrying emerge of %s." % target |
| |
| def Run(self): |
| """Run through the scheduled ebuilds. |
| |
| Keep running so long as we have uninstalled packages in the |
| dependency graph to merge. |
| """ |
| while self._deps_map: |
| # Check here that we are actually waiting for something. |
| if (self._emerge_queue.empty() and |
| self._done_queue.empty() and |
| not self._jobs and |
| self._deps_map): |
| # If we have failed on a package, retry it now. |
| if self._retry_queue: |
| self._Retry() |
| # If we have failed a package twice, just give up. |
| elif self._failed: |
| for failure, output in self._failed.items(): |
| print "Package failed: %s" % failure |
| print output |
| PrintDepsMap(self._deps_map) |
| print "Packages failed: %s" % " ,".join(self._failed.keys()) |
| sys.exit(1) |
| # If we have dependency cycles. |
| else: |
| print "Deadlock! Circular dependencies!" |
| PrintDepsMap(self._deps_map) |
| sys.exit(1) |
| |
| try: |
| target, retcode, output = self._done_queue.get(timeout=5) |
| except Queue.Empty: |
| # Print an update. |
| self._Status() |
| continue |
| |
| self._jobs.discard(target) |
| |
| # Print if necessary. |
| if retcode != 0: |
| print output |
| if retcode != 0: |
| # Handle job failure. |
| if target in self._failed: |
| # If this job has failed previously, give up. |
| print "Failed %s. Your build has failed." % target |
| else: |
| # Queue up this build to try again after a long while. |
| self._retry_queue.append(target) |
| self._failed[target] = 1 |
| print "Failed %s, retrying later." % target |
| else: |
| if target in self._failed and self._retry_queue: |
| # If we have successfully retried a failed package, and there |
| # are more failed packages, try the next one. We will only have |
| # one retrying package actively running at a time. |
| self._Retry() |
| |
| print "Completed %s" % target |
| # Mark as completed and unblock waiting ebuilds. |
| self._Finish(target) |
| |
| # Print an update. |
| self._Status() |
| |
| |
| def main(): |
| |
| deps = DepGraphGenerator() |
| deps.Initialize(sys.argv[1:]) |
| emerge = deps.emerge |
| |
| if emerge.action is not None: |
| sys.argv = deps.ParseParallelEmergeArgs(sys.argv) |
| sys.exit(emerge_main()) |
| elif not emerge.cmdline_packages: |
| Usage() |
| sys.exit(1) |
| |
| # Unless we're in pretend mode, there's not much point running without |
| # root access. We need to be able to install packages. |
| # |
| # NOTE: Even if you're running --pretend, it's a good idea to run |
| # parallel_emerge with root access so that portage can write to the |
| # dependency cache. This is important for performance. |
| if "--pretend" not in emerge.opts and portage.secpass < 2: |
| print "parallel_emerge: superuser access is required." |
| sys.exit(1) |
| |
| if "--quiet" not in emerge.opts: |
| cmdline_packages = " ".join(emerge.cmdline_packages) |
| print "Starting fast-emerge." |
| print " Building package %s on %s" % (cmdline_packages, |
| deps.board or "root") |
| |
| deps_tree, deps_info = deps.GenDependencyTree() |
| |
| # You want me to be verbose? I'll give you two trees! Twice as much value. |
| if "--tree" in emerge.opts and "--verbose" in emerge.opts: |
| deps.PrintTree(deps_tree) |
| |
| deps_graph = deps.GenDependencyGraph(deps_tree, deps_info) |
| |
| # OK, time to print out our progress so far. |
| deps.PrintInstallPlan(deps_graph) |
| if "--tree" in emerge.opts: |
| PrintDepsMap(deps_graph) |
| |
| # Run the queued emerges. |
| scheduler = EmergeQueue(deps_graph, emerge, deps.package_db) |
| scheduler.Run() |
| |
| # Update world. |
| if ("--oneshot" not in emerge.opts and |
| "--pretend" not in emerge.opts): |
| world_set = emerge.root_config.sets["selected"] |
| new_world_pkgs = [] |
| root = emerge.settings["ROOT"] |
| final_db = emerge.depgraph._dynamic_config.mydbapi[root] |
| for pkg in emerge.cmdline_packages: |
| for db_pkg in final_db.match_pkgs(pkg): |
| print "Adding %s to world" % db_pkg.cp |
| new_world_pkgs.append(db_pkg.cp) |
| if new_world_pkgs: |
| world_set.update(new_world_pkgs) |
| |
| print "Done" |
| |
| if __name__ == "__main__": |
| main() |