blob: 49cf7b2f4d1e29caafc786bcf301ecab9c14b239 [file] [log] [blame]
#!/usr/bin/python2.6
# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Program to run emerge in parallel, for significant speedup.
Usage:
./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]
[emerge args] package"
Basic operation:
Runs 'emerge -p --debug' to display dependencies, and stores a
dependency graph. All non-blocked packages are launched in parallel,
as 'emerge --nodeps package' with any blocked packages being emerged
immediately upon deps being met.
For this to work effectively, /usr/lib/portage/pym/portage/locks.py
must be stubbed out, preventing portage from slowing itself with
unneccesary locking, as this script ensures that emerge is run in such
a way that common resources are never in conflict. This is controlled
by an environment variable PORTAGE_LOCKS set in parallel emerge
subprocesses.
Parallel Emerge unlocks two things during operation, here's what you
must do to keep this safe:
* Storage dir containing binary packages. - Don't emerge new
packages while installing the existing ones.
* Portage database - You must not examine deps while modifying the
database. Therefore you may only parallelize "-p" read only access,
or "--nodeps" write only access.
Caveats:
* Some ebuild packages have incorrectly specified deps, and running
them in parallel is more likely to bring out these failures.
* Some ebuilds (especially the build part) have complex dependencies
that are not captured well by this script (it may be necessary to
install an old package to build, but then install a newer version
of the same package for a runtime dep).
"""
import copy
import multiprocessing
import os
import Queue
import shlex
import sys
import tempfile
import time
import urllib2
# If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On
# Chromium OS, the default "portage" user doesn't have the necessary
# permissions. It'd be easier if we could default to $USERNAME, but $USERNAME
# is "root" here because we get called through sudo.
#
# We need to set this before importing any portage modules, because portage
# looks up "PORTAGE_USERNAME" at import time.
#
# NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't
# encounter this case unless they have an old chroot or blow away the
# environment by running sudo without the -E specifier.
if "PORTAGE_USERNAME" not in os.environ:
homedir = os.environ["HOME"]
if homedir.startswith("/home/"):
os.environ["PORTAGE_USERNAME"] = homedir.split("/")[2]
# Portage doesn't expose dependency trees in its public API, so we have to
# make use of some private APIs here. These modules are found under
# /usr/lib/portage/pym/.
#
# TODO(davidjames): Update Portage to expose public APIs for these features.
from _emerge.actions import adjust_configs
from _emerge.actions import load_emerge_config
from _emerge.create_depgraph_params import create_depgraph_params
from _emerge.depgraph import backtrack_depgraph
from _emerge.main import emerge_main
from _emerge.main import parse_opts
from _emerge.Package import Package
from _emerge.Scheduler import Scheduler
from _emerge.stdout_spinner import stdout_spinner
import portage
import portage.debug
def Usage():
"""Print usage."""
print "Usage:"
print " ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]"
print " [--rebuild] [emerge args] package"
print
print "Packages specified as workon packages are always built from source."
print "Unless --no-workon-deps is specified, packages that depend on these"
print "packages are also built from source."
print
print "The --workon argument is mainly useful when you want to build and"
print "install packages that you are working on unconditionally, but do not"
print "to have to rev the package to indicate you want to build it from"
print "source. The build_packages script will automatically supply the"
print "workon argument to emerge, ensuring that packages selected using"
print "cros-workon are rebuilt."
print
print "The --rebuild option rebuilds packages whenever their dependencies"
print "are changed. This ensures that your build is correct."
sys.exit(1)
# These are dependencies that are not specified in the package,
# but will prevent the package from installing.
secret_deps = {}
# Global start time
GLOBAL_START = time.time()
class EmergeData(object):
"""This simple struct holds various emerge variables.
This struct helps us easily pass emerge variables around as a unit.
These variables are used for calculating dependencies and installing
packages.
"""
__slots__ = ["action", "cmdline_packages", "depgraph", "mtimedb", "opts",
"root_config", "scheduler_graph", "settings", "spinner",
"trees"]
def __init__(self):
# The action the user requested. If the user is installing packages, this
# is None. If the user is doing anything other than installing packages,
# this will contain the action name, which will map exactly to the
# long-form name of the associated emerge option.
#
# Example: If you call parallel_emerge --unmerge package, the action name
# will be "unmerge"
self.action = None
# The list of packages the user passed on the command-line.
self.cmdline_packages = None
# The emerge dependency graph. It'll contain all the packages involved in
# this merge, along with their versions.
self.depgraph = None
# A dict of the options passed to emerge. This dict has been cleaned up
# a bit by parse_opts, so that it's a bit easier for the emerge code to
# look at the options.
#
# Emerge takes a few shortcuts in its cleanup process to make parsing of
# the options dict easier. For example, if you pass in "--usepkg=n", the
# "--usepkg" flag is just left out of the dictionary altogether. Because
# --usepkg=n is the default, this makes parsing easier, because emerge
# can just assume that if "--usepkg" is in the dictionary, it's enabled.
#
# These cleanup processes aren't applied to all options. For example, the
# --with-bdeps flag is passed in as-is. For a full list of the cleanups
# applied by emerge, see the parse_opts function in the _emerge.main
# package.
self.opts = None
# A dictionary used by portage to maintain global state. This state is
# loaded from disk when portage starts up, and saved to disk whenever we
# call mtimedb.commit().
#
# This database contains information about global updates (i.e., what
# version of portage we have) and what we're currently doing. Portage
# saves what it is currently doing in this database so that it can be
# resumed when you call it with the --resume option.
#
# parallel_emerge does not save what it is currently doing in the mtimedb,
# so we do not support the --resume option.
self.mtimedb = None
# The portage configuration for our current root. This contains the portage
# settings (see below) and the three portage trees for our current root.
# (The three portage trees are explained below, in the documentation for
# the "trees" member.)
self.root_config = None
# The scheduler graph is used by emerge to calculate what packages to
# install. We don't actually install any deps, so this isn't really used,
# but we pass it in to the Scheduler object anyway.
self.scheduler_graph = None
# Portage settings for our current session. Most of these settings are set
# in make.conf inside our current install root.
self.settings = None
# The spinner, which spews stuff to stdout to indicate that portage is
# doing something. We maintain our own spinner, so we set the portage
# spinner to "silent" mode.
self.spinner = None
# The portage trees. There are separate portage trees for each root. To get
# the portage tree for the current root, you can look in self.trees[root],
# where root = self.settings["ROOT"].
#
# In each root, there are three trees: vartree, porttree, and bintree.
# - vartree: A database of the currently-installed packages.
# - porttree: A database of ebuilds, that can be used to build packages.
# - bintree: A database of binary packages.
self.trees = None
class DepGraphGenerator(object):
"""Grab dependency information about packages from portage.
Typical usage:
deps = DepGraphGenerator()
deps.Initialize(sys.argv[1:])
deps_tree, deps_info = deps.GenDependencyTree()
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
deps.PrintTree(deps_tree)
PrintDepsMap(deps_graph)
"""
__slots__ = ["board", "emerge", "mandatory_source", "no_workon_deps",
"package_db", "rebuild"]
def __init__(self):
self.board = None
self.emerge = EmergeData()
self.mandatory_source = set()
self.no_workon_deps = False
self.package_db = {}
self.rebuild = False
def ParseParallelEmergeArgs(self, argv):
"""Read the parallel emerge arguments from the command-line.
We need to be compatible with emerge arg format. We scrape arguments that
are specific to parallel_emerge, and pass through the rest directly to
emerge.
Args:
argv: arguments list
Returns:
Arguments that don't belong to parallel_emerge
"""
emerge_args = []
for arg in argv:
# Specifically match arguments that are specific to parallel_emerge, and
# pass through the rest.
if arg.startswith("--board="):
self.board = arg.replace("--board=", "")
elif arg.startswith("--workon="):
workon_str = arg.replace("--workon=", "")
package_list = shlex.split(" ".join(shlex.split(workon_str)))
self.mandatory_source.update(package_list)
elif arg == "--no-workon-deps":
self.no_workon_deps = True
elif arg == "--rebuild":
self.rebuild = True
else:
# Not one of our options, so pass through to emerge.
emerge_args.append(arg)
if self.rebuild:
if self.no_workon_deps:
print "--rebuild is not compatible with --no-workon-deps"
sys.exit(1)
return emerge_args
def Initialize(self, args):
"""Initializer. Parses arguments and sets up portage state."""
# Parse and strip out args that are just intended for parallel_emerge.
emerge_args = self.ParseParallelEmergeArgs(args)
# Setup various environment variables based on our current board. These
# variables are normally setup inside emerge-${BOARD}, but since we don't
# call that script, we have to set it up here. These variables serve to
# point our tools at /build/BOARD and to setup cross compiles to the
# appropriate board as configured in toolchain.conf.
if self.board:
os.environ["PORTAGE_CONFIGROOT"] = "/build/" + self.board
os.environ["PORTAGE_SYSROOT"] = "/build/" + self.board
os.environ["SYSROOT"] = "/build/" + self.board
scripts_dir = os.path.dirname(os.path.realpath(__file__))
toolchain_path = "%s/../overlays/overlay-%s/toolchain.conf"
# Strip the variant out of the board name to look for the toolchain. This
# is similar to what setup_board does.
board_no_variant = self.board.split('_')[0]
f = open(toolchain_path % (scripts_dir, board_no_variant))
os.environ["CHOST"] = f.readline().strip()
f.close()
# Although CHROMEOS_ROOT isn't specific to boards, it's normally setup
# inside emerge-${BOARD}, so we set it up here for compatibility. It
# will be going away soon as we migrate to CROS_WORKON_SRCROOT.
os.environ.setdefault("CHROMEOS_ROOT", os.environ["HOME"] + "/trunk")
# Modify the environment to disable locking.
os.environ["PORTAGE_LOCKS"] = "false"
os.environ["UNMERGE_DELAY"] = "0"
# Parse the emerge options.
action, opts, cmdline_packages = parse_opts(emerge_args)
# If we're installing to the board, we want the --root-deps option so that
# portage will install the build dependencies to that location as well.
if self.board:
opts.setdefault("--root-deps", True)
# Set environment variables based on options. Portage normally sets these
# environment variables in emerge_main, but we can't use that function,
# because it also does a bunch of other stuff that we don't want.
# TODO(davidjames): Patch portage to move this logic into a function we can
# reuse here.
if "--debug" in opts:
os.environ["PORTAGE_DEBUG"] = "1"
if "--config-root" in opts:
os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"]
if "--root" in opts:
os.environ["ROOT"] = opts["--root"]
if "--accept-properties" in opts:
os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"]
# Now that we've setup the necessary environment variables, we can load the
# emerge config from disk.
settings, trees, mtimedb = load_emerge_config()
# Check whether our portage tree is out of date. Typically, this happens
# when you're setting up a new portage tree, such as in setup_board and
# make_chroot. In that case, portage applies a bunch of global updates
# here. Once the updates are finished, we need to commit any changes
# that the global update made to our mtimedb, and reload the config.
#
# Portage normally handles this logic in emerge_main, but again, we can't
# use that function here.
if portage._global_updates(trees, mtimedb["updates"]):
mtimedb.commit()
settings, trees, mtimedb = load_emerge_config(trees=trees)
# Setup implied options. Portage normally handles this logic in
# emerge_main.
if "--buildpkgonly" in opts or "buildpkg" in settings.features:
opts.setdefault("--buildpkg", True)
if "--getbinpkgonly" in opts:
opts.setdefault("--usepkgonly", True)
opts.setdefault("--getbinpkg", True)
if "getbinpkg" in settings.features:
# Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n
opts["--getbinpkg"] = True
if "--getbinpkg" in opts or "--usepkgonly" in opts:
opts.setdefault("--usepkg", True)
if "--fetch-all-uri" in opts:
opts.setdefault("--fetchonly", True)
if "--skipfirst" in opts:
opts.setdefault("--resume", True)
if "--buildpkgonly" in opts:
# --buildpkgonly will not merge anything, so it overrides all binary
# package options.
for opt in ("--getbinpkg", "--getbinpkgonly",
"--usepkg", "--usepkgonly"):
opts.pop(opt, None)
if (settings.get("PORTAGE_DEBUG", "") == "1" and
"python-trace" in settings.features):
portage.debug.set_trace(True)
# Complain about unsupported options
for opt in ("--ask", "--ask-enter-invalid", "--complete-graph",
"--resume", "--skipfirst"):
if opt in opts:
print "%s is not supported by parallel_emerge" % opt
sys.exit(1)
# Make emerge specific adjustments to the config (e.g. colors!)
adjust_configs(opts, trees)
# Save our configuration so far in the emerge object
emerge = self.emerge
emerge.action, emerge.opts = action, opts
emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb
emerge.cmdline_packages = cmdline_packages
root = settings["ROOT"]
emerge.root_config = trees[root]["root_config"]
def GenDependencyTree(self):
"""Get dependency tree info from emerge.
TODO(): Update cros_extract_deps to also use this code.
Returns:
Dependency tree
"""
start = time.time()
# Setup emerge options.
#
# We treat dependency info a bit differently than emerge itself. Unless
# you're using --usepkgonly, we disable --getbinpkg and --usepkg here so
# that emerge will look at the dependencies of the source ebuilds rather
# than the binary dependencies. This helps ensure that we have the option
# of merging a package from source, if we want to switch to it with
# --workon and the dependencies have changed.
emerge = self.emerge
emerge_opts = emerge.opts.copy()
emerge_opts.pop("--getbinpkg", None)
if "--usepkgonly" not in emerge_opts:
emerge_opts.pop("--usepkg", None)
if self.mandatory_source or self.rebuild:
# Enable --emptytree so that we get the full tree, which we need for
# dependency analysis. By default, with this option, emerge optimizes
# the graph by removing uninstall instructions from the graph. By
# specifying --tree as well, we tell emerge that it's not safe to remove
# uninstall instructions because we're planning on analyzing the output.
emerge_opts["--tree"] = True
emerge_opts["--emptytree"] = True
# Create a list of packages to merge
packages = set(emerge.cmdline_packages[:])
if self.mandatory_source:
packages.update(self.mandatory_source)
# Tell emerge to be quiet. We print plenty of info ourselves so we don't
# need any extra output from portage.
portage.util.noiselimit = -1
# My favorite feature: The silent spinner. It doesn't spin. Ever.
# I'd disable the colors by default too, but they look kind of cool.
emerge.spinner = stdout_spinner()
emerge.spinner.update = emerge.spinner.update_quiet
if "--quiet" not in emerge.opts:
print "Calculating deps..."
# Ask portage to build a dependency graph. with the options we specified
# above.
params = create_depgraph_params(emerge_opts, emerge.action)
success, depgraph, _ = backtrack_depgraph(
emerge.settings, emerge.trees, emerge_opts, params, emerge.action,
packages, emerge.spinner)
emerge.depgraph = depgraph
# Is it impossible to honor the user's request? Bail!
if not success:
depgraph.display_problems()
sys.exit(1)
# Build our own tree from the emerge digraph.
deps_tree = {}
digraph = depgraph._dynamic_config.digraph
for node, node_deps in digraph.nodes.items():
# Calculate dependency packages that need to be installed first. Each
# child on the digraph is a dependency. The "operation" field specifies
# what we're doing (e.g. merge, uninstall, etc.). The "priorities" array
# contains the type of dependency (e.g. build, runtime, runtime_post,
# etc.)
#
# Emerge itself actually treats some dependencies as "soft" dependencies
# and sometimes ignores them. We don't do that -- we honor all
# dependencies unless we're forced to prune them because they're cyclic.
#
# Portage refers to the identifiers for packages as a CPV. This acronym
# stands for Component/Path/Version.
#
# Here's an example CPV: chromeos-base/power_manager-0.0.1-r1
# Split up, this CPV would be:
# C -- Component: chromeos-base
# P -- Path: power_manager
# V -- Version: 0.0.1-r1
#
# We just refer to CPVs as packages here because it's easier.
deps = {}
for child, priorities in node_deps[0].items():
deps[str(child.cpv)] = dict(action=str(child.operation),
deptype=str(priorities[-1]),
deps={})
# We've built our list of deps, so we can add our package to the tree.
if isinstance(node, Package):
deps_tree[str(node.cpv)] = dict(action=str(node.operation),
deps=deps)
emptytree = "--emptytree" in emerge.opts
# Ask portage for its install plan, so that we can only throw out
# dependencies that portage throws out. Also, keep track of the old
# versions of packages that we're either upgrading or replacing.
#
# The "vardb" is the database of installed packages.
vardb = emerge.trees[emerge.settings["ROOT"]]["vartree"].dbapi
deps_info = {}
for pkg in depgraph.altlist():
if isinstance(pkg, Package):
# If we're not in emptytree mode, and we're going to replace a package
# that is already installed, then this operation is possibly optional.
# ("--selective" mode is handled later, in RemoveInstalledPackages())
optional = False
if not emptytree and vardb.cpv_exists(pkg.cpv):
optional = True
# Add the package to our database.
self.package_db[str(pkg.cpv)] = pkg
# Save off info about the package
deps_info[str(pkg.cpv)] = {"idx": len(deps_info),
"optional": optional}
# Delete the --tree option, because we don't really want to display a
# tree. We just wanted to get emerge to leave uninstall instructions on
# the graph. Later, when we display the graph, we'll want standard-looking
# output, so removing the --tree option is important.
depgraph._frozen_config.myopts.pop("--tree", None)
seconds = time.time() - start
if "--quiet" not in emerge.opts:
print "Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60)
return deps_tree, deps_info
def PrintTree(self, deps, depth=""):
"""Print the deps we have seen in the emerge output.
Args:
deps: Dependency tree structure.
depth: Allows printing the tree recursively, with indentation.
"""
for entry in sorted(deps):
action = deps[entry]["action"]
print "%s %s (%s)" % (depth, entry, action)
self.PrintTree(deps[entry]["deps"], depth=depth + " ")
def GenDependencyGraph(self, deps_tree, deps_info):
"""Generate a doubly linked dependency graph.
Args:
deps_tree: Dependency tree structure.
deps_info: More details on the dependencies.
Returns:
Deps graph in the form of a dict of packages, with each package
specifying a "needs" list and "provides" list.
"""
emerge = self.emerge
root = emerge.settings["ROOT"]
# It's useful to know what packages will actually end up on the
# system at some point. Packages in final_db are either already
# installed, or will be installed by the time we're done.
final_db = emerge.depgraph._dynamic_config.mydbapi[root]
# final_pkgs is a set of the packages we found in the final_db. These
# packages are either already installed, or will be installed by the time
# we're done. It's populated in BuildFinalPackageSet()
final_pkgs = set()
# deps_map is the actual dependency graph.
#
# Each package specifies a "needs" list and a "provides" list. The "needs"
# list indicates which packages we depend on. The "provides" list
# indicates the reverse dependencies -- what packages need us.
#
# We also provide some other information in the dependency graph:
# - action: What we're planning on doing with this package. Generally,
# "merge", "nomerge", or "uninstall"
# - mandatory_source:
# If true, indicates that this package must be compiled from source.
# We set this for "workon" packages, and for packages where the
# binaries are known to be out of date.
# - mandatory:
# If true, indicates that this package must be installed. We don't care
# whether it's binary or source, unless the mandatory_source flag is
# also set.
#
deps_map = {}
def ReverseTree(packages):
"""Convert tree to digraph.
Take the tree of package -> requirements and reverse it to a digraph of
buildable packages -> packages they unblock.
Args:
packages: Tree(s) of dependencies.
Returns:
Unsanitized digraph.
"""
for pkg in packages:
# Create an entry for the package
action = packages[pkg]["action"]
default_pkg = {"needs": {}, "provides": set(), "action": action,
"mandatory_source": False, "mandatory": False}
this_pkg = deps_map.setdefault(pkg, default_pkg)
# Create entries for dependencies of this package first.
ReverseTree(packages[pkg]["deps"])
# Add dependencies to this package.
for dep, dep_item in packages[pkg]["deps"].iteritems():
dep_pkg = deps_map[dep]
dep_type = dep_item["deptype"]
if dep_type != "runtime_post":
dep_pkg["provides"].add(pkg)
this_pkg["needs"][dep] = dep_type
def BuildFinalPackageSet():
# If this package is installed, or will get installed, add it to
# final_pkgs
for pkg in deps_map:
for match in final_db.match_pkgs(pkg):
final_pkgs.add(str(match.cpv))
def FindCycles():
"""Find cycles in the dependency tree.
Returns:
Dict of packages involved in cyclic dependencies, mapping each package
to a list of the cycles the package is involved in.
"""
def FindCyclesAtNode(pkg, cycles, unresolved, resolved):
"""Find cycles in cyclic dependencies starting at specified package.
Args:
pkg: Package identifier.
cycles: Set of cycles so far.
unresolved: Nodes that have been visited but are not fully processed.
resolved: Nodes that have been visited and are fully processed.
Returns:
Whether a cycle was found.
"""
if pkg in resolved:
return
unresolved.append(pkg)
for dep in deps_map[pkg]["needs"]:
if dep in unresolved:
idx = unresolved.index(dep)
mycycle = unresolved[idx:] + [dep]
for cycle_pkg in mycycle:
info = cycles.setdefault(cycle_pkg, {})
info.setdefault("pkgs", set()).update(mycycle)
info.setdefault("cycles", []).append(mycycle)
else:
FindCyclesAtNode(dep, cycles, unresolved, resolved)
unresolved.pop()
resolved.add(pkg)
cycles, unresolved, resolved = {}, [], set()
for pkg in deps_map:
FindCyclesAtNode(pkg, cycles, unresolved, resolved)
return cycles
def RemoveInstalledPackages():
"""Remove installed packages, propagating dependencies."""
# If we're not in selective mode, the packages on the command line are
# not optional.
if "--selective" in emerge.opts:
selective = emerge.opts["--selective"] != "n"
else:
selective = "--noreplace" in emerge.opts or "--update" in emerge.opts
if not selective:
for pkg in emerge.cmdline_packages:
for db_pkg in final_db.match_pkgs(pkg):
deps_info[db_pkg.cpv]["optional"] = False
# Schedule packages that aren't on the install list for removal
rm_pkgs = set(deps_map.keys()) - set(deps_info.keys())
# Schedule optional packages for removal
for pkg, info in deps_info.items():
if info["optional"]:
rm_pkgs.add(pkg)
# Remove the packages we don't want, simplifying the graph and making
# it easier for us to crack cycles.
for pkg in sorted(rm_pkgs):
this_pkg = deps_map[pkg]
needs = this_pkg["needs"]
provides = this_pkg["provides"]
for dep in needs:
dep_provides = deps_map[dep]["provides"]
dep_provides.update(provides)
dep_provides.discard(pkg)
dep_provides.discard(dep)
for target in provides:
target_needs = deps_map[target]["needs"]
target_needs.update(needs)
target_needs.pop(pkg, None)
target_needs.pop(target, None)
del deps_map[pkg]
def SanitizeTree(cycles):
"""Remove circular dependencies.
We only prune circular dependencies that go against the emerge ordering.
This has a nice property: we're guaranteed to merge dependencies in the
same order that portage does.
Because we don't treat any dependencies as "soft" unless they're killed
by a cycle, we pay attention to a larger number of dependencies when
merging. This hurts performance a bit, but helps reliability.
Args:
cycles: Dict of packages involved in cyclic dependencies, mapping each
package to a list of the cycles the package is involved in. Produced
by FindCycles().
"""
for basedep in set(cycles).intersection(deps_map):
this_pkg = deps_map[basedep]
for dep in this_pkg["provides"].intersection(cycles[basedep]["pkgs"]):
if deps_info[basedep]["idx"] >= deps_info[dep]["idx"]:
for mycycle in cycles[basedep]["cycles"]:
if dep in mycycle:
print "Breaking %s -> %s in cycle:" % (dep, basedep)
for i in range(len(mycycle) - 1):
needs = deps_map[mycycle[i]]["needs"]
deptype = needs.get(mycycle[i+1], "deleted")
print " %s -> %s (%s)" % (mycycle[i], mycycle[i+1], deptype)
del deps_map[dep]["needs"][basedep]
this_pkg["provides"].remove(dep)
break
def AddSecretDeps():
"""Find these tagged packages and add extra dependencies.
For debugging dependency problems.
"""
for bad in secret_deps:
needed = secret_deps[bad]
bad_pkg = None
needed_pkg = None
for dep in deps_map:
if dep.find(bad) != -1:
bad_pkg = dep
if dep.find(needed) != -1:
needed_pkg = dep
if bad_pkg and needed_pkg:
deps_map[needed_pkg]["provides"].add(bad_pkg)
deps_map[bad_pkg]["needs"][needed_pkg] = "secret"
def MergeChildren(pkg, merge_type):
"""Merge this package and all packages it provides."""
this_pkg = deps_map[pkg]
if this_pkg[merge_type] or pkg not in final_pkgs:
return set()
# Mark this package as non-optional
deps_info[pkg]["optional"] = False
this_pkg[merge_type] = True
for w in this_pkg["provides"]:
MergeChildren(w, merge_type)
if this_pkg["action"] == "nomerge":
this_pkg["action"] = "merge"
def RemotePackageDatabase():
"""Grab the latest binary package database from the prebuilt server.
We need to know the modification times of the prebuilt packages so that we
know when it is OK to use these packages and when we should rebuild them
instead.
Returns:
A dict mapping package identifiers to modification times.
"""
url = self.emerge.settings["PORTAGE_BINHOST"] + "/Packages"
prebuilt_pkgs = {}
f = urllib2.urlopen(url)
for line in f:
if line.startswith("CPV: "):
pkg = line.replace("CPV: ", "").rstrip()
elif line.startswith("MTIME: "):
prebuilt_pkgs[pkg] = int(line[:-1].replace("MTIME: ", ""))
f.close()
return prebuilt_pkgs
def LocalPackageDatabase():
"""Get the modification times of the packages in the local database.
We need to know the modification times of the local packages so that we
know when they need to be rebuilt.
Returns:
A dict mapping package identifiers to modification times.
"""
if self.board:
path = "/build/%s/packages/Packages" % self.board
else:
path = "/var/lib/portage/pkgs/Packages"
local_pkgs = {}
for line in file(path):
if line.startswith("CPV: "):
pkg = line.replace("CPV: ", "").rstrip()
elif line.startswith("MTIME: "):
local_pkgs[pkg] = int(line[:-1].replace("MTIME: ", ""))
return local_pkgs
def AutoRebuildDeps(local_pkgs, remote_pkgs, cycles):
"""Recursively rebuild packages when necessary using modification times.
If you've modified a package, it's a good idea to rebuild all the packages
that depend on it from source. This function looks for any packages which
depend on packages that have been modified and ensures that they get
rebuilt.
Args:
local_pkgs: Modification times from the local database.
remote_pkgs: Modification times from the prebuilt server.
cycles: Dictionary returned from FindCycles()
Returns:
The set of packages we marked as needing to be merged.
"""
def PrebuiltsReady(pkg, pkg_db, cache):
"""Check whether the prebuilts are ready for pkg and all deps.
Args:
pkg: The specified package.
pkg_db: The package DB to use.
cache: A dict, where the results are stored.
Returns:
True iff the prebuilts are ready for pkg and all deps.
"""
if pkg in cache:
return cache[pkg]
if pkg not in pkg_db:
cache[pkg] = False
else:
cache[pkg] = True
for dep in deps_map[pkg]["needs"]:
if not PrebuiltsReady(dep, pkg_db, cache):
cache[pkg] = False
break
return cache[pkg]
def LastModifiedWithDeps(pkg, pkg_db, cache):
"""Calculate the last modified time of a package and its dependencies.
This function looks at all the packages needed by the specified package
and checks the most recent modification time of all of those packages.
If the dependencies of a package were modified more recently than the
package itself, then we know the package needs to be rebuilt.
Args:
pkg: The specified package.
pkg_db: The package DB to use.
cache: A dict, where the last modified times are stored.
Returns:
The last modified time of the specified package and its dependencies.
"""
if pkg in cache:
return cache[pkg]
cache[pkg] = pkg_db.get(pkg, 0)
for dep in deps_map[pkg]["needs"]:
t = LastModifiedWithDeps(dep, pkg_db, cache)
cache[pkg] = max(cache[pkg], t)
return cache[pkg]
# For every package that's getting updated in our local cache (binary
# or source), make sure we also update the children. If a package is
# built from source, all children must also be built from source.
local_ready_cache, remote_ready_cache = {}, {}
local_mtime_cache, remote_mtime_cache = {}, {}
for pkg in final_pkgs:
# If all the necessary local packages are ready, and their
# modification times are in sync, we don't need to do anything here.
local_mtime = LastModifiedWithDeps(pkg, local_pkgs, local_mtime_cache)
local_ready = PrebuiltsReady(pkg, local_pkgs, local_ready_cache)
if (not local_ready or local_pkgs.get(pkg, 0) < local_mtime and
pkg not in cycles):
# OK, at least one package is missing from the local cache or is
# outdated. This means we're going to have to install the package
# and all dependencies.
#
# If all the necessary remote packages are ready, and they're at
# least as new as our local packages, we can install them.
# Otherwise, we need to build from source.
remote_mtime = LastModifiedWithDeps(pkg, remote_pkgs,
remote_mtime_cache)
remote_ready = PrebuiltsReady(pkg, remote_pkgs, remote_ready_cache)
if remote_ready and (local_mtime <= remote_mtime or pkg in cycles):
MergeChildren(pkg, "mandatory")
else:
MergeChildren(pkg, "mandatory_source")
def UsePrebuiltPackages():
"""Update packages that can use prebuilts to do so."""
start = time.time()
# The bintree is the database of binary packages. By default, it's
# empty.
bintree = emerge.trees[root]["bintree"]
bindb = bintree.dbapi
root_config = emerge.root_config
prebuilt_pkgs = {}
# Populate the DB with packages
bintree.populate("--getbinpkg" in emerge.opts,
"--getbinpkgonly" in emerge.opts)
# Update packages that can use prebuilts to do so.
for pkg, info in deps_map.iteritems():
if info and not info["mandatory_source"] and info["action"] == "merge":
db_keys = list(bindb._aux_cache_keys)
try:
db_vals = bindb.aux_get(pkg, db_keys + ["MTIME"])
except KeyError:
# No binary package
continue
mtime = int(db_vals.pop() or 0)
metadata = zip(db_keys, db_vals)
db_pkg = Package(built=True, cpv=pkg, installed=False,
metadata=metadata, onlydeps=False, mtime=mtime,
operation="merge", root_config=root_config,
type_name="binary")
self.package_db[pkg] = db_pkg
seconds = time.time() - start
if "--quiet" not in emerge.opts:
print "Prebuilt DB populated in %dm%.1fs" % (seconds / 60, seconds % 60)
return prebuilt_pkgs
def AddRemainingPackages():
"""Fill in packages that don't have entries in the package db.
Every package we are installing needs an entry in the package db.
This function should only be called after we have removed the
packages that are not being merged from our deps_map.
"""
for pkg in deps_map:
if pkg not in self.package_db:
if deps_map[pkg]["action"] != "merge":
# We should only fill in packages that are being merged. If
# there's any other packages here, something funny is going on.
print "Missing entry for %s in package db" % pkg
sys.exit(1)
db_pkg = emerge.depgraph._pkg(pkg, "ebuild", emerge.root_config)
self.package_db[pkg] = db_pkg
ReverseTree(deps_tree)
BuildFinalPackageSet()
AddSecretDeps()
if self.no_workon_deps:
for pkg in self.mandatory_source.copy():
for db_pkg in final_db.match_pkgs(pkg):
deps_map[str(db_pkg.cpv)]["mandatory_source"] = True
else:
for pkg in self.mandatory_source.copy():
for db_pkg in final_db.match_pkgs(pkg):
MergeChildren(str(db_pkg.cpv), "mandatory_source")
cycles = FindCycles()
if self.rebuild:
local_pkgs = LocalPackageDatabase()
remote_pkgs = RemotePackageDatabase()
AutoRebuildDeps(local_pkgs, remote_pkgs, cycles)
# We need to remove installed packages so that we can use the dependency
# ordering of the install process to show us what cycles to crack. Once
# we've done that, we also need to recalculate our list of cycles so that
# we don't include the installed packages in our cycles.
RemoveInstalledPackages()
cycles = FindCycles()
SanitizeTree(cycles)
if deps_map:
if "--usepkg" in emerge.opts:
UsePrebuiltPackages()
AddRemainingPackages()
return deps_map
def PrintInstallPlan(self, deps_map):
"""Print an emerge-style install plan.
The install plan lists what packages we're installing, in order.
It's useful for understanding what parallel_emerge is doing.
Args:
deps_map: The dependency graph.
"""
def InstallPlanAtNode(target, deps_map):
nodes = []
nodes.append(target)
for dep in deps_map[target]["provides"]:
del deps_map[dep]["needs"][target]
if not deps_map[dep]["needs"]:
nodes.extend(InstallPlanAtNode(dep, deps_map))
return nodes
deps_map = copy.deepcopy(deps_map)
install_plan = []
plan = set()
for target, info in deps_map.iteritems():
if not info["needs"] and target not in plan:
for item in InstallPlanAtNode(target, deps_map):
plan.add(item)
install_plan.append(self.package_db[item])
self.emerge.depgraph.display(install_plan)
def PrintDepsMap(deps_map):
"""Print dependency graph, for each package list it's prerequisites."""
for i in deps_map:
print "%s: (%s) needs" % (i, deps_map[i]["action"])
needs = deps_map[i]["needs"]
for j in needs:
print " %s" % (j)
if not needs:
print " no dependencies"
def EmergeWorker(task_queue, done_queue, emerge, package_db):
"""This worker emerges any packages given to it on the task_queue.
Args:
task_queue: The queue of tasks for this worker to do.
done_queue: The queue of results from the worker.
emerge: An EmergeData() object.
package_db: A dict, mapping package ids to portage Package objects.
It expects package identifiers to be passed to it via task_queue. When
the package is merged, it pushes (target, retval, outputstr) into the
done_queue.
"""
settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb
opts, spinner = emerge.opts, emerge.spinner
opts["--nodeps"] = True
while True:
target = task_queue.get()
print "Emerging", target
db_pkg = package_db[target]
db_pkg.root_config = emerge.root_config
install_list = [db_pkg]
output = tempfile.TemporaryFile()
outputstr = ""
if "--pretend" in opts:
retval = 0
else:
save_stdout = sys.stdout
save_stderr = sys.stderr
try:
sys.stdout = output
sys.stderr = output
scheduler = Scheduler(settings, trees, mtimedb, opts, spinner,
install_list, [], emerge.scheduler_graph)
retval = scheduler.merge()
finally:
sys.stdout = save_stdout
sys.stderr = save_stderr
if retval is None:
retval = 0
if retval != 0:
output.seek(0)
outputstr = output.read()
done_queue.put((target, retval, outputstr))
class EmergeQueue(object):
"""Class to schedule emerge jobs according to a dependency graph."""
def __init__(self, deps_map, emerge, package_db):
# Store the dependency graph.
self._deps_map = deps_map
# Initialize the running queue to empty
self._jobs = set()
# List of total package installs represented in deps_map.
install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"]
self._total_jobs = len(install_jobs)
if "--pretend" in emerge.opts:
print "Skipping merge because of --pretend mode."
sys.exit(0)
# Setup scheduler graph object. This is used by the child processes
# to help schedule jobs.
emerge.scheduler_graph = emerge.depgraph.schedulerGraph()
procs = min(self._total_jobs,
emerge.opts.get("--jobs", multiprocessing.cpu_count()))
self._emerge_queue = multiprocessing.Queue()
self._done_queue = multiprocessing.Queue()
args = (self._emerge_queue, self._done_queue, emerge, package_db)
self._pool = multiprocessing.Pool(procs, EmergeWorker, args)
# Initialize the failed queue to empty.
self._retry_queue = []
self._failed = {}
# Print an update before we launch the merges.
self._Status()
for target, info in deps_map.items():
if not info["needs"]:
self._Schedule(target)
def _Schedule(self, target):
# We maintain a tree of all deps, if this doesn't need
# to be installed just free up it's children and continue.
# It is possible to reinstall deps of deps, without reinstalling
# first level deps, like so:
# chromeos (merge) -> eselect (nomerge) -> python (merge)
if self._deps_map[target]["action"] == "nomerge":
self._Finish(target)
else:
# Kick off the build if it's marked to be built.
self._jobs.add(target)
self._emerge_queue.put(target)
def _LoadAvg(self):
loads = open("/proc/loadavg", "r").readline().split()[:3]
return " ".join(loads)
def _Status(self):
"""Print status."""
seconds = time.time() - GLOBAL_START
line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s "
"[Time %dm%.1fs Load %s]")
qsize = self._emerge_queue.qsize()
print line % (len(self._deps_map), qsize, len(self._jobs) - qsize,
len(self._retry_queue), self._total_jobs,
seconds / 60, seconds % 60, self._LoadAvg())
def _Finish(self, target):
"""Mark a target as completed and unblock dependecies."""
for dep in self._deps_map[target]["provides"]:
del self._deps_map[dep]["needs"][target]
if not self._deps_map[dep]["needs"]:
self._Schedule(dep)
self._deps_map.pop(target)
def _Retry(self):
if self._retry_queue:
target = self._retry_queue.pop(0)
self._Schedule(target)
print "Retrying emerge of %s." % target
def Run(self):
"""Run through the scheduled ebuilds.
Keep running so long as we have uninstalled packages in the
dependency graph to merge.
"""
while self._deps_map:
# Check here that we are actually waiting for something.
if (self._emerge_queue.empty() and
self._done_queue.empty() and
not self._jobs and
self._deps_map):
# If we have failed on a package, retry it now.
if self._retry_queue:
self._Retry()
# If we have failed a package twice, just give up.
elif self._failed:
for failure, output in self._failed.items():
print "Package failed: %s" % failure
print output
PrintDepsMap(self._deps_map)
print "Packages failed: %s" % " ,".join(self._failed.keys())
sys.exit(1)
# If we have dependency cycles.
else:
print "Deadlock! Circular dependencies!"
PrintDepsMap(self._deps_map)
sys.exit(1)
try:
target, retcode, output = self._done_queue.get(timeout=5)
except Queue.Empty:
# Print an update.
self._Status()
continue
self._jobs.discard(target)
# Print if necessary.
if retcode != 0:
print output
if retcode != 0:
# Handle job failure.
if target in self._failed:
# If this job has failed previously, give up.
print "Failed %s. Your build has failed." % target
else:
# Queue up this build to try again after a long while.
self._retry_queue.append(target)
self._failed[target] = 1
print "Failed %s, retrying later." % target
else:
if target in self._failed and self._retry_queue:
# If we have successfully retried a failed package, and there
# are more failed packages, try the next one. We will only have
# one retrying package actively running at a time.
self._Retry()
print "Completed %s" % target
# Mark as completed and unblock waiting ebuilds.
self._Finish(target)
# Print an update.
self._Status()
def main():
deps = DepGraphGenerator()
deps.Initialize(sys.argv[1:])
emerge = deps.emerge
if emerge.action is not None:
sys.argv = deps.ParseParallelEmergeArgs(sys.argv)
sys.exit(emerge_main())
elif not emerge.cmdline_packages:
Usage()
sys.exit(1)
# Unless we're in pretend mode, there's not much point running without
# root access. We need to be able to install packages.
#
# NOTE: Even if you're running --pretend, it's a good idea to run
# parallel_emerge with root access so that portage can write to the
# dependency cache. This is important for performance.
if "--pretend" not in emerge.opts and portage.secpass < 2:
print "parallel_emerge: superuser access is required."
sys.exit(1)
if "--quiet" not in emerge.opts:
cmdline_packages = " ".join(emerge.cmdline_packages)
print "Starting fast-emerge."
print " Building package %s on %s" % (cmdline_packages,
deps.board or "root")
deps_tree, deps_info = deps.GenDependencyTree()
# You want me to be verbose? I'll give you two trees! Twice as much value.
if "--tree" in emerge.opts and "--verbose" in emerge.opts:
deps.PrintTree(deps_tree)
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
# OK, time to print out our progress so far.
deps.PrintInstallPlan(deps_graph)
if "--tree" in emerge.opts:
PrintDepsMap(deps_graph)
# Run the queued emerges.
scheduler = EmergeQueue(deps_graph, emerge, deps.package_db)
scheduler.Run()
# Update world.
if ("--oneshot" not in emerge.opts and
"--pretend" not in emerge.opts):
world_set = emerge.root_config.sets["selected"]
new_world_pkgs = []
root = emerge.settings["ROOT"]
final_db = emerge.depgraph._dynamic_config.mydbapi[root]
for pkg in emerge.cmdline_packages:
for db_pkg in final_db.match_pkgs(pkg):
print "Adding %s to world" % db_pkg.cp
new_world_pkgs.append(db_pkg.cp)
if new_world_pkgs:
world_set.update(new_world_pkgs)
print "Done"
if __name__ == "__main__":
main()