blob: b0d43ca7b153cc062e088ecf35a6d620e2f6f30c [file] [log] [blame]
#!/usr/bin/python2.6
# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Program to run emerge in parallel, for significant speedup.
Usage:
./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]
[--force-remote-binary=PKGS] [emerge args] package
Basic operation:
Runs 'emerge -p --debug' to display dependencies, and stores a
dependency graph. All non-blocked packages are launched in parallel,
as 'emerge --nodeps package' with any blocked packages being emerged
immediately upon deps being met.
For this to work effectively, /usr/lib/portage/pym/portage/locks.py
must be stubbed out, preventing portage from slowing itself with
unneccesary locking, as this script ensures that emerge is run in such
a way that common resources are never in conflict. This is controlled
by an environment variable PORTAGE_LOCKS set in parallel emerge
subprocesses.
Parallel Emerge unlocks two things during operation, here's what you
must do to keep this safe:
* Storage dir containing binary packages. - Don't emerge new
packages while installing the existing ones.
* Portage database - You must not examine deps while modifying the
database. Therefore you may only parallelize "-p" read only access,
or "--nodeps" write only access.
Caveats:
* Some ebuild packages have incorrectly specified deps, and running
them in parallel is more likely to bring out these failures.
* Some ebuilds (especially the build part) have complex dependencies
that are not captured well by this script (it may be necessary to
install an old package to build, but then install a newer version
of the same package for a runtime dep).
"""
import codecs
import copy
import errno
import multiprocessing
import os
import Queue
import shlex
import signal
import sys
import tempfile
import time
import traceback
import urllib2
import urlparse
# If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On
# Chromium OS, the default "portage" user doesn't have the necessary
# permissions. It'd be easier if we could default to $USERNAME, but $USERNAME
# is "root" here because we get called through sudo.
#
# We need to set this before importing any portage modules, because portage
# looks up "PORTAGE_USERNAME" at import time.
#
# NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't
# encounter this case unless they have an old chroot or blow away the
# environment by running sudo without the -E specifier.
if "PORTAGE_USERNAME" not in os.environ:
homedir = os.environ.get("HOME")
if homedir:
os.environ["PORTAGE_USERNAME"] = os.path.basename(homedir)
# Portage doesn't expose dependency trees in its public API, so we have to
# make use of some private APIs here. These modules are found under
# /usr/lib/portage/pym/.
#
# TODO(davidjames): Update Portage to expose public APIs for these features.
from _emerge.actions import adjust_configs
from _emerge.actions import load_emerge_config
from _emerge.create_depgraph_params import create_depgraph_params
from _emerge.depgraph import depgraph as emerge_depgraph
from _emerge.depgraph import _frozen_depgraph_config
from _emerge.main import emerge_main
from _emerge.main import parse_opts
from _emerge.Package import Package
from _emerge.Scheduler import Scheduler
from _emerge.SetArg import SetArg
from _emerge.stdout_spinner import stdout_spinner
import portage
import portage.debug
import portage.versions
new_portage = not portage.VERSION.startswith("2.1.7.")
if new_portage:
from portage._global_updates import _global_updates
else:
from portage import _global_updates
def Usage():
"""Print usage."""
print "Usage:"
print " ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]"
print " [--rebuild] [emerge args] package"
print
print "Packages specified as workon packages are always built from source."
print "Unless --no-workon-deps is specified, packages that depend on these"
print "packages are also built from source."
print
print "The --workon argument is mainly useful when you want to build and"
print "install packages that you are working on unconditionally, but do not"
print "to have to rev the package to indicate you want to build it from"
print "source. The build_packages script will automatically supply the"
print "workon argument to emerge, ensuring that packages selected using"
print "cros-workon are rebuilt."
print
print "The --rebuild option rebuilds packages whenever their dependencies"
print "are changed. This ensures that your build is correct."
sys.exit(1)
# These are dependencies that are not specified in the package,
# but will prevent the package from installing.
secret_deps = {}
# Global start time
GLOBAL_START = time.time()
class EmergeData(object):
"""This simple struct holds various emerge variables.
This struct helps us easily pass emerge variables around as a unit.
These variables are used for calculating dependencies and installing
packages.
"""
__slots__ = ["action", "cmdline_packages", "depgraph", "mtimedb", "opts",
"root_config", "scheduler_graph", "settings", "spinner",
"trees"]
def __init__(self):
# The action the user requested. If the user is installing packages, this
# is None. If the user is doing anything other than installing packages,
# this will contain the action name, which will map exactly to the
# long-form name of the associated emerge option.
#
# Example: If you call parallel_emerge --unmerge package, the action name
# will be "unmerge"
self.action = None
# The list of packages the user passed on the command-line.
self.cmdline_packages = None
# The emerge dependency graph. It'll contain all the packages involved in
# this merge, along with their versions.
self.depgraph = None
# A dict of the options passed to emerge. This dict has been cleaned up
# a bit by parse_opts, so that it's a bit easier for the emerge code to
# look at the options.
#
# Emerge takes a few shortcuts in its cleanup process to make parsing of
# the options dict easier. For example, if you pass in "--usepkg=n", the
# "--usepkg" flag is just left out of the dictionary altogether. Because
# --usepkg=n is the default, this makes parsing easier, because emerge
# can just assume that if "--usepkg" is in the dictionary, it's enabled.
#
# These cleanup processes aren't applied to all options. For example, the
# --with-bdeps flag is passed in as-is. For a full list of the cleanups
# applied by emerge, see the parse_opts function in the _emerge.main
# package.
self.opts = None
# A dictionary used by portage to maintain global state. This state is
# loaded from disk when portage starts up, and saved to disk whenever we
# call mtimedb.commit().
#
# This database contains information about global updates (i.e., what
# version of portage we have) and what we're currently doing. Portage
# saves what it is currently doing in this database so that it can be
# resumed when you call it with the --resume option.
#
# parallel_emerge does not save what it is currently doing in the mtimedb,
# so we do not support the --resume option.
self.mtimedb = None
# The portage configuration for our current root. This contains the portage
# settings (see below) and the three portage trees for our current root.
# (The three portage trees are explained below, in the documentation for
# the "trees" member.)
self.root_config = None
# The scheduler graph is used by emerge to calculate what packages to
# install. We don't actually install any deps, so this isn't really used,
# but we pass it in to the Scheduler object anyway.
self.scheduler_graph = None
# Portage settings for our current session. Most of these settings are set
# in make.conf inside our current install root.
self.settings = None
# The spinner, which spews stuff to stdout to indicate that portage is
# doing something. We maintain our own spinner, so we set the portage
# spinner to "silent" mode.
self.spinner = None
# The portage trees. There are separate portage trees for each root. To get
# the portage tree for the current root, you can look in self.trees[root],
# where root = self.settings["ROOT"].
#
# In each root, there are three trees: vartree, porttree, and bintree.
# - vartree: A database of the currently-installed packages.
# - porttree: A database of ebuilds, that can be used to build packages.
# - bintree: A database of binary packages.
self.trees = None
class DepGraphGenerator(object):
"""Grab dependency information about packages from portage.
Typical usage:
deps = DepGraphGenerator()
deps.Initialize(sys.argv[1:])
deps_tree, deps_info = deps.GenDependencyTree()
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
deps.PrintTree(deps_tree)
PrintDepsMap(deps_graph)
"""
__slots__ = ["board", "emerge", "mandatory_source", "no_workon_deps",
"nomerge", "package_db", "rebuild", "show_output",
"force_remote_binary", "forced_remote_binary_packages"]
def __init__(self):
self.board = None
self.emerge = EmergeData()
self.mandatory_source = set()
self.no_workon_deps = False
self.nomerge = set()
self.package_db = {}
self.rebuild = False
self.show_output = False
self.force_remote_binary = set()
self.forced_remote_binary_packages = set()
def ParseParallelEmergeArgs(self, argv):
"""Read the parallel emerge arguments from the command-line.
We need to be compatible with emerge arg format. We scrape arguments that
are specific to parallel_emerge, and pass through the rest directly to
emerge.
Args:
argv: arguments list
Returns:
Arguments that don't belong to parallel_emerge
"""
emerge_args = []
for arg in argv:
# Specifically match arguments that are specific to parallel_emerge, and
# pass through the rest.
if arg.startswith("--board="):
self.board = arg.replace("--board=", "")
elif arg.startswith("--workon="):
workon_str = arg.replace("--workon=", "")
package_list = shlex.split(" ".join(shlex.split(workon_str)))
self.mandatory_source.update(package_list)
elif arg.startswith("--force-remote-binary="):
force_remote_binary = arg.replace("--force-remote-binary=", "")
force_remote_binary = \
shlex.split(" ".join(shlex.split(force_remote_binary)))
self.force_remote_binary.update(force_remote_binary)
elif arg.startswith("--nomerge="):
nomerge_str = arg.replace("--nomerge=", "")
package_list = shlex.split(" ".join(shlex.split(nomerge_str)))
self.nomerge.update(package_list)
elif arg == "--no-workon-deps":
self.no_workon_deps = True
elif arg == "--rebuild":
self.rebuild = True
elif arg == "--show-output":
self.show_output = True
else:
# Not one of our options, so pass through to emerge.
emerge_args.append(arg)
if self.rebuild:
if self.no_workon_deps:
print "--rebuild is not compatible with --no-workon-deps"
sys.exit(1)
return emerge_args
def Initialize(self, args):
"""Initializer. Parses arguments and sets up portage state."""
# Parse and strip out args that are just intended for parallel_emerge.
emerge_args = self.ParseParallelEmergeArgs(args)
# Setup various environment variables based on our current board. These
# variables are normally setup inside emerge-${BOARD}, but since we don't
# call that script, we have to set it up here. These variables serve to
# point our tools at /build/BOARD and to setup cross compiles to the
# appropriate board as configured in toolchain.conf.
if self.board:
os.environ["PORTAGE_CONFIGROOT"] = "/build/" + self.board
os.environ["PORTAGE_SYSROOT"] = "/build/" + self.board
os.environ["SYSROOT"] = "/build/" + self.board
srcroot = "%s/../../src" % os.path.dirname(os.path.realpath(__file__))
# Strip the variant out of the board name to look for the toolchain. This
# is similar to what setup_board does.
board_no_variant = self.board.split('_')[0]
public_toolchain_path = ("%s/overlays/overlay-%s/toolchain.conf" %
(srcroot, board_no_variant))
private_toolchain_path = (
"%s/private-overlays/overlay-%s-private/toolchain.conf" %
(srcroot, board_no_variant))
if os.path.isfile(public_toolchain_path):
toolchain_path = public_toolchain_path
elif os.path.isfile(private_toolchain_path):
toolchain_path = private_toolchain_path
else:
print "Not able to locate toolchain.conf in board overlays"
sys.exit(1)
f = open(toolchain_path)
os.environ["CHOST"] = f.readline().strip()
f.close()
# Although CHROMEOS_ROOT isn't specific to boards, it's normally setup
# inside emerge-${BOARD}, so we set it up here for compatibility. It
# will be going away soon as we migrate to CROS_WORKON_SRCROOT.
os.environ.setdefault("CHROMEOS_ROOT", os.environ["HOME"] + "/trunk")
# Turn off interactive delays
os.environ["EBEEP_IGNORE"] = "1"
os.environ["EPAUSE_IGNORE"] = "1"
os.environ["UNMERGE_DELAY"] = "0"
# Parse the emerge options.
action, opts, cmdline_packages = parse_opts(emerge_args)
# If we're installing to the board, we want the --root-deps option so that
# portage will install the build dependencies to that location as well.
if self.board:
opts.setdefault("--root-deps", True)
# Set environment variables based on options. Portage normally sets these
# environment variables in emerge_main, but we can't use that function,
# because it also does a bunch of other stuff that we don't want.
# TODO(davidjames): Patch portage to move this logic into a function we can
# reuse here.
if "--debug" in opts:
os.environ["PORTAGE_DEBUG"] = "1"
if "--config-root" in opts:
os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"]
if "--root" in opts:
os.environ["ROOT"] = opts["--root"]
if "--accept-properties" in opts:
os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"]
# Portage has two flags for doing collision protection: collision-protect
# and protect-owned. The protect-owned feature is enabled by default and
# is quite useful: it checks to make sure that we don't have multiple
# packages that own the same file. The collision-protect feature is more
# strict, and less useful: it fails if it finds a conflicting file, even
# if that file was created by an earlier ebuild that failed to install.
#
# We want to disable collision-protect here because we don't handle
# failures during the merge step very well. Sometimes we leave old files
# lying around and they cause problems, so for now we disable the flag.
# TODO(davidjames): Look for a better solution.
features = os.environ.get("FEATURES", "") + " -collision-protect"
# If we're installing packages to the board, and we're not using the
# official flag, we can enable the following optimizations:
# 1) Don't lock during install step. This allows multiple packages to be
# installed at once. This is safe because our board packages do not
# muck with each other during the post-install step.
# 2) Don't update the environment until the end of the build. This is
# safe because board packages don't need to run during the build --
# they're cross-compiled, so our CPU architecture doesn't support them
# anyway.
if self.board and os.environ.get("CHROMEOS_OFFICIAL") != "1":
os.environ.setdefault("PORTAGE_LOCKS", "false")
features = features + " no-env-update"
os.environ["FEATURES"] = features
# Now that we've setup the necessary environment variables, we can load the
# emerge config from disk.
settings, trees, mtimedb = load_emerge_config()
# Check whether our portage tree is out of date. Typically, this happens
# when you're setting up a new portage tree, such as in setup_board and
# make_chroot. In that case, portage applies a bunch of global updates
# here. Once the updates are finished, we need to commit any changes
# that the global update made to our mtimedb, and reload the config.
#
# Portage normally handles this logic in emerge_main, but again, we can't
# use that function here.
if _global_updates(trees, mtimedb["updates"]):
mtimedb.commit()
settings, trees, mtimedb = load_emerge_config(trees=trees)
# Setup implied options. Portage normally handles this logic in
# emerge_main.
if "--buildpkgonly" in opts or "buildpkg" in settings.features:
opts.setdefault("--buildpkg", True)
if "--getbinpkgonly" in opts:
opts.setdefault("--usepkgonly", True)
opts.setdefault("--getbinpkg", True)
if "getbinpkg" in settings.features:
# Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n
opts["--getbinpkg"] = True
if "--getbinpkg" in opts or "--usepkgonly" in opts:
opts.setdefault("--usepkg", True)
if "--fetch-all-uri" in opts:
opts.setdefault("--fetchonly", True)
if "--skipfirst" in opts:
opts.setdefault("--resume", True)
if "--buildpkgonly" in opts:
# --buildpkgonly will not merge anything, so it overrides all binary
# package options.
for opt in ("--getbinpkg", "--getbinpkgonly",
"--usepkg", "--usepkgonly"):
opts.pop(opt, None)
if (settings.get("PORTAGE_DEBUG", "") == "1" and
"python-trace" in settings.features):
portage.debug.set_trace(True)
# Complain about unsupported options
for opt in ("--ask", "--ask-enter-invalid", "--complete-graph",
"--resume", "--skipfirst"):
if opt in opts:
print "%s is not supported by parallel_emerge" % opt
sys.exit(1)
# Make emerge specific adjustments to the config (e.g. colors!)
adjust_configs(opts, trees)
# Save our configuration so far in the emerge object
emerge = self.emerge
emerge.action, emerge.opts = action, opts
emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb
emerge.cmdline_packages = cmdline_packages
root = settings["ROOT"]
emerge.root_config = trees[root]["root_config"]
if new_portage and "--usepkg" in opts:
emerge.trees[root]["bintree"].populate("--getbinpkg" in opts)
def CheckUseFlags(self, pkgsettings, cur_pkg, new_pkg):
"""Are the use flags in cur_pkg up to date?
Return True if use flags are up to date; return false otherwise."""
# cur_use: The set of flags that were enabled when the package was
# first installed.
# cur_iuse: The set of flags that affected the specified package
# when it was first installed.
#
# The intersection of cur_use and cur_iuse provides the set of
# flags that were enabled and affected the specified package.
cur_use = cur_pkg.use.enabled
cur_iuse = cur_pkg.iuse.all
# Check whether this package is already installed with the right use
# flags.
#
# now_use: The set of flags (special and non-special) that are now
# enabled for the specified package.
# now_iuse: The set of non-special flags that affect the specified
# package.
now_use = new_pkg.use.enabled
now_iuse = new_pkg.iuse.all
# Tell portage we want to lookup the flags for the specified package
# in package.use.{mask,force}
pkgsettings.setcpv(new_pkg.cpv)
# Grab the set of flags that are requested for the given package.
# This includes flags that don't affect the package, and includes
# all sources of flags (e.g. USE environment variable, make.conf,
# make.defaults, package.use.{mask,force}, etc.).
#
# This is used by portage in the _reinstall_for_flags function below.
forced_flags = set(pkgsettings.useforce).union(pkgsettings.usemask)
depgraph = self.emerge.depgraph
flags = depgraph._reinstall_for_flags(forced_flags, cur_use,
cur_iuse, now_use, now_iuse)
return not flags
def CreateDepgraph(self, emerge, packages):
"""Create an emerge depgraph object."""
# Setup emerge options.
emerge_opts = emerge.opts.copy()
# Enable --emptytree so that we get the full tree, which we need for
# dependency analysis. By default, with this option, emerge optimizes
# the graph by removing uninstall instructions from the graph. By
# specifying --tree as well, we tell emerge that it's not safe to remove
# uninstall instructions because we're planning on analyzing the output.
emerge_opts["--tree"] = True
emerge_opts["--emptytree"] = True
# Set up parameters.
params = create_depgraph_params(emerge_opts, emerge.action)
frozen_config = _frozen_depgraph_config(emerge.settings, emerge.trees,
emerge_opts, emerge.spinner)
backtrack_max = emerge_opts.get('--backtrack', 5)
backtrack_parameters = {}
allow_backtracking = backtrack_max > 0
# Try up to backtrack_max times to create a working depgraph. Each time we
# run into a conflict, mask the offending package and try again.
# TODO(davidjames): When Portage supports --force-remote-binary directly,
# switch back to using the backtrack_depgraph function.
for i in range(backtrack_max + 2):
# Create a depgraph object.
depgraph = emerge_depgraph(emerge.settings, emerge.trees, emerge_opts,
params, emerge.spinner, frozen_config=frozen_config,
allow_backtracking=allow_backtracking,
**backtrack_parameters)
if i == 0:
for cpv in self.forced_remote_binary_packages:
# If --force-remote-binary was specified, we want to use this package
# regardless of its use flags. Unfortunately, Portage doesn't support
# ignoring use flags for just one package. To convince Portage to
# install the package, we trick Portage into thinking the package has
# the right use flags.
# TODO(davidjames): Update Portage to support --force-remote-binary
# directly, so that this hack isn't necessary.
pkg = depgraph._pkg(cpv, "binary", emerge.root_config)
pkgsettings = frozen_config.pkgsettings[pkg.root]
pkgsettings.setcpv(pkg)
pkg.use.enabled = pkgsettings["PORTAGE_USE"].split()
# Select the packages we want.
success, favorites = depgraph.select_files(packages)
if success:
break
elif depgraph.need_restart() and i < backtrack_max:
# Looks like we found some packages that can't be installed due to
# conflicts. Try again, masking out the conflicting packages.
if new_portage:
backtrack_parameters = depgraph.get_backtrack_parameters()
else:
backtrack_parameters = {
'runtime_pkg_mask': depgraph.get_runtime_pkg_mask()
}
elif allow_backtracking and i > 0:
# Looks like we can't solve the graph. Stop backtracking and report an
# error message.
backtrack_parameters.pop('runtime_pkg_mask', None)
allow_backtracking = False
else:
break
# Delete the --tree option, because we don't really want to display a
# tree. We just wanted to get emerge to leave uninstall instructions on
# the graph. Later, when we display the graph, we'll want standard-looking
# output, so removing the --tree option is important.
frozen_config.myopts.pop("--tree", None)
emerge.depgraph = depgraph
# Is it impossible to honor the user's request? Bail!
if not success:
depgraph.display_problems()
sys.exit(1)
def GenDependencyTree(self, remote_pkgs):
"""Get dependency tree info from emerge.
TODO(): Update cros_extract_deps to also use this code.
Returns:
Dependency tree
"""
start = time.time()
emerge = self.emerge
# Create a list of packages to merge
packages = set(emerge.cmdline_packages[:])
if self.mandatory_source:
packages.update(self.mandatory_source)
if self.force_remote_binary:
forced_pkgs = {}
for pkg in remote_pkgs:
category, pkgname, _, _ = portage.catpkgsplit(pkg)
full_pkgname = "%s/%s" % (category, pkgname)
if (pkgname in self.force_remote_binary or
full_pkgname in self.force_remote_binary):
forced_pkgs.setdefault(full_pkgname, []).append(pkg)
# Add forced binary packages to the dependency list. This is necessary
# to ensure that the install plan contains the right package.
#
# Putting the forced binary package at the beginning of the list is an
# optimization that helps avoid unnecessary backtracking (e.g., if
# Portage first selects the wrong version, and then backtracks later, it
# takes a bit longer and uses up an unnecessary backtrack iteration.)
packages = list(packages)
for pkgs in forced_pkgs.values():
forced_package = portage.versions.best(pkgs)
packages.insert(0, "=%s" % forced_package)
self.forced_remote_binary_packages.add(forced_package)
# Tell emerge to be quiet. We print plenty of info ourselves so we don't
# need any extra output from portage.
portage.util.noiselimit = -1
# My favorite feature: The silent spinner. It doesn't spin. Ever.
# I'd disable the colors by default too, but they look kind of cool.
emerge.spinner = stdout_spinner()
emerge.spinner.update = emerge.spinner.update_quiet
if "--quiet" not in emerge.opts:
print "Calculating deps..."
self.CreateDepgraph(emerge, packages)
depgraph = emerge.depgraph
# Build our own tree from the emerge digraph.
deps_tree = {}
digraph = depgraph._dynamic_config.digraph
for node, node_deps in digraph.nodes.items():
# Calculate dependency packages that need to be installed first. Each
# child on the digraph is a dependency. The "operation" field specifies
# what we're doing (e.g. merge, uninstall, etc.). The "priorities" array
# contains the type of dependency (e.g. build, runtime, runtime_post,
# etc.)
#
# Emerge itself actually treats some dependencies as "soft" dependencies
# and sometimes ignores them. We don't do that -- we honor all
# dependencies unless we're forced to prune them because they're cyclic.
#
# Portage refers to the identifiers for packages as a CPV. This acronym
# stands for Component/Path/Version.
#
# Here's an example CPV: chromeos-base/power_manager-0.0.1-r1
# Split up, this CPV would be:
# C -- Component: chromeos-base
# P -- Path: power_manager
# V -- Version: 0.0.1-r1
#
# We just refer to CPVs as packages here because it's easier.
deps = {}
for child, priorities in node_deps[0].items():
if isinstance(child, SetArg): continue
deps[str(child.cpv)] = dict(action=str(child.operation),
deptype=str(priorities[-1]),
deps={})
# We've built our list of deps, so we can add our package to the tree.
if isinstance(node, Package):
deps_tree[str(node.cpv)] = dict(action=str(node.operation),
deps=deps)
emptytree = "--emptytree" in emerge.opts
# Ask portage for its install plan, so that we can only throw out
# dependencies that portage throws out. Also, keep track of the old
# versions of packages that we're either upgrading or replacing.
#
# The "vardb" is the database of installed packages.
root = emerge.settings["ROOT"]
frozen_config = depgraph._frozen_config
vardb = frozen_config.trees[root]["vartree"].dbapi
pkgsettings = frozen_config.pkgsettings[root]
deps_info = {}
for pkg in depgraph.altlist():
if isinstance(pkg, Package):
# If we're not using --force-remote-binary, check what flags are being
# used by the real package.
if pkg.operation != "uninstall" and "--usepkgonly" not in emerge.opts:
try:
pkg = emerge.depgraph._pkg(pkg.cpv, "ebuild", emerge.root_config)
except portage.exception.PackageNotFound:
# This is a --force-remote-binary package.
pass
self.package_db[pkg.cpv] = pkg
# If we're not in emptytree mode, and we're going to replace a package
# that is already installed, then this operation is possibly optional.
# ("--selective" mode is handled later, in RemoveInstalledPackages())
optional = False
if pkg.operation != "uninstall" and not emptytree:
for vardb_pkg in vardb.match_pkgs(pkg.cpv):
if self.CheckUseFlags(pkgsettings, vardb_pkg, pkg):
optional = True
break
# Save off info about the package
deps_info[str(pkg.cpv)] = {"idx": len(deps_info),
"optional": optional}
seconds = time.time() - start
if "--quiet" not in emerge.opts:
print "Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60)
return deps_tree, deps_info
def PrintTree(self, deps, depth=""):
"""Print the deps we have seen in the emerge output.
Args:
deps: Dependency tree structure.
depth: Allows printing the tree recursively, with indentation.
"""
for entry in sorted(deps):
action = deps[entry]["action"]
print "%s %s (%s)" % (depth, entry, action)
self.PrintTree(deps[entry]["deps"], depth=depth + " ")
def RemotePackageDatabase(self):
"""Grab the latest binary package database from the prebuilt server.
We need to know the modification times of the prebuilt packages so that we
know when it is OK to use these packages and when we should rebuild them
instead.
Returns:
A dict mapping package identifiers to modification times.
"""
root = self.emerge.settings["ROOT"]
bindb = self.emerge.trees[root]["bintree"].dbapi
prebuilt_pkgs = {}
for pkg in bindb.cpv_all():
prebuilt_pkgs[pkg] = bindb.aux_get(pkg, ["BUILD_TIME"])[0]
return prebuilt_pkgs
def GenDependencyGraph(self, deps_tree, deps_info, remote_pkgs):
"""Generate a doubly linked dependency graph.
Args:
deps_tree: Dependency tree structure.
deps_info: More details on the dependencies.
Returns:
Deps graph in the form of a dict of packages, with each package
specifying a "needs" list and "provides" list.
"""
emerge = self.emerge
root = emerge.settings["ROOT"]
# It's useful to know what packages will actually end up on the
# system at some point. Packages in final_db are either already
# installed, or will be installed by the time we're done.
final_db = emerge.depgraph._dynamic_config.mydbapi[root]
# final_pkgs is a set of the packages we found in the final_db. These
# packages are either already installed, or will be installed by the time
# we're done. It's populated in BuildFinalPackageSet()
final_pkgs = set()
# These packages take a really long time to build, so, for expediency, we
# are blacklisting them from automatic rebuilds because one of their
# dependencies needs to be recompiled.
rebuild_blacklist = set()
for pkg in ("chromeos-base/chromeos-chrome", "media-plugins/o3d",
"dev-java/icedtea"):
for match in final_db.match_pkgs(pkg):
rebuild_blacklist.add(str(match.cpv))
# deps_map is the actual dependency graph.
#
# Each package specifies a "needs" list and a "provides" list. The "needs"
# list indicates which packages we depend on. The "provides" list
# indicates the reverse dependencies -- what packages need us.
#
# We also provide some other information in the dependency graph:
# - action: What we're planning on doing with this package. Generally,
# "merge", "nomerge", or "uninstall"
# - mandatory_source:
# If true, indicates that this package must be compiled from source.
# We set this for "workon" packages, and for packages where the
# binaries are known to be out of date.
# - mandatory:
# If true, indicates that this package must be installed. We don't care
# whether it's binary or source, unless the mandatory_source flag is
# also set.
# - force_remote_binary:
# If true, indicates that we want to update to the latest remote prebuilt
# of this package. Packages that depend on this package should be built
# from source.
#
deps_map = {}
def ReverseTree(packages):
"""Convert tree to digraph.
Take the tree of package -> requirements and reverse it to a digraph of
buildable packages -> packages they unblock.
Args:
packages: Tree(s) of dependencies.
Returns:
Unsanitized digraph.
"""
for pkg in packages:
# Create an entry for the package
action = packages[pkg]["action"]
default_pkg = {"needs": {}, "provides": set(), "action": action,
"mandatory_source": False, "mandatory": False,
"force_remote_binary": False}
this_pkg = deps_map.setdefault(pkg, default_pkg)
# Create entries for dependencies of this package first.
ReverseTree(packages[pkg]["deps"])
# Add dependencies to this package.
for dep, dep_item in packages[pkg]["deps"].iteritems():
dep_pkg = deps_map[dep]
dep_type = dep_item["deptype"]
if dep_type != "runtime_post":
dep_pkg["provides"].add(pkg)
this_pkg["needs"][dep] = dep_type
def BuildFinalPackageSet():
# If this package is installed, or will get installed, add it to
# final_pkgs
for pkg in deps_map:
for match in final_db.match_pkgs(pkg):
if match.cpv in deps_info:
final_pkgs.add(str(match.cpv))
def FindCycles():
"""Find cycles in the dependency tree.
Returns:
A dict mapping cyclic packages to a dict of the deps that cause
cycles. For each dep that causes cycles, it returns an example
traversal of the graph that shows the cycle.
"""
def FindCyclesAtNode(pkg, cycles, unresolved, resolved):
"""Find cycles in cyclic dependencies starting at specified package.
Args:
pkg: Package identifier.
cycles: A dict mapping cyclic packages to a dict of the deps that
cause cycles. For each dep that causes cycles, it returns an
example traversal of the graph that shows the cycle.
unresolved: Nodes that have been visited but are not fully processed.
resolved: Nodes that have been visited and are fully processed.
"""
pkg_cycles = cycles.get(pkg)
if pkg in resolved and not pkg_cycles:
# If we already looked at this package, and found no cyclic
# dependencies, we can stop now.
return
unresolved.append(pkg)
for dep in deps_map[pkg]["needs"]:
if dep in unresolved:
idx = unresolved.index(dep)
mycycle = unresolved[idx:] + [dep]
for i in range(len(mycycle) - 1):
pkg1, pkg2 = mycycle[i], mycycle[i+1]
cycles.setdefault(pkg1, {}).setdefault(pkg2, mycycle)
elif not pkg_cycles or dep not in pkg_cycles:
# Looks like we haven't seen this edge before.
FindCyclesAtNode(dep, cycles, unresolved, resolved)
unresolved.pop()
resolved.add(pkg)
cycles, unresolved, resolved = {}, [], set()
for pkg in deps_map:
FindCyclesAtNode(pkg, cycles, unresolved, resolved)
return cycles
def RemoveInstalledPackages():
"""Remove installed packages, propagating dependencies."""
# If we're in non-selective mode, the packages specified on the command
# line are generally mandatory.
#
# There are a few exceptions to this rule:
# 1. If the package isn't getting installed because it's in
# package.provided, it's not mandatory.
# 2. If the package isn't getting installed because we're in --onlydeps
# mode, it's not mandatory either.
if "--selective" in emerge.opts:
selective = emerge.opts["--selective"] != "n"
else:
selective = ("--noreplace" in emerge.opts or
"--update" in emerge.opts or
"--newuse" in emerge.opts or
"--reinstall" in emerge.opts)
onlydeps = "--onlydeps" in emerge.opts
if not selective:
for pkg in emerge.cmdline_packages:
# If the package specified on the command-line is in our install
# list, mark it as non-optional.
found = False
for db_pkg in final_db.match_pkgs(pkg):
this_pkg = deps_info.get(db_pkg.cpv)
if this_pkg:
found = True
this_pkg["optional"] = False
# We didn't find the package in our final db. If we're not in
# --onlydeps mode, this likely means that the package was specified
# in package.provided.
if not found and not onlydeps and "--verbose" in emerge.opts:
print "Skipping %s (is it in package.provided?)" % pkg
# Schedule packages that aren't on the install list for removal
rm_pkgs = set(deps_map.keys()) - set(deps_info.keys())
# Schedule optional packages for removal
for pkg, info in deps_info.items():
if info["optional"]:
rm_pkgs.add(pkg)
# Schedule nomerge packages for removal
for pkg in self.nomerge:
for db_pkg in final_db.match_pkgs(pkg):
if db_pkg.cpv in deps_map:
rm_pkgs.add(str(db_pkg.cpv))
# Remove the packages we don't want, simplifying the graph and making
# it easier for us to crack cycles.
for pkg in sorted(rm_pkgs):
this_pkg = deps_map[pkg]
needs = this_pkg["needs"]
provides = this_pkg["provides"]
for dep in needs:
dep_provides = deps_map[dep]["provides"]
dep_provides.update(provides)
dep_provides.discard(pkg)
dep_provides.discard(dep)
for target in provides:
target_needs = deps_map[target]["needs"]
target_needs.update(needs)
target_needs.pop(pkg, None)
target_needs.pop(target, None)
del deps_map[pkg]
def PrintCycleBreak(basedep, dep, mycycle):
"""Print details about a cycle that we are planning on breaking.
We are breaking a cycle where dep needs basedep. mycycle is an
example cycle which contains dep -> basedep."""
# If it's an optional dependency, there's no need to spam the user with
# warning messages.
needs = deps_map[dep]["needs"]
depinfo = needs.get(basedep, "deleted")
if depinfo == "optional":
return
# Notify the user that we're breaking a cycle.
print "Breaking %s -> %s (%s)" % (dep, basedep, depinfo)
# Show cycle.
for i in range(len(mycycle) - 1):
pkg1, pkg2 = mycycle[i], mycycle[i+1]
needs = deps_map[pkg1]["needs"]
depinfo = needs.get(pkg2, "deleted")
if pkg1 == dep and pkg2 == basedep:
depinfo = depinfo + ", deleting"
print " %s -> %s (%s)" % (pkg1, pkg2, depinfo)
def SanitizeTree():
"""Remove circular dependencies.
We prune all dependencies involved in cycles that go against the emerge
ordering. This has a nice property: we're guaranteed to merge
dependencies in the same order that portage does.
Because we don't treat any dependencies as "soft" unless they're killed
by a cycle, we pay attention to a larger number of dependencies when
merging. This hurts performance a bit, but helps reliability.
"""
start = time.time()
cycles = FindCycles()
while cycles:
for dep, mycycles in cycles.iteritems():
for basedep, mycycle in mycycles.iteritems():
if deps_info[basedep]["idx"] >= deps_info[dep]["idx"]:
PrintCycleBreak(basedep, dep, mycycle)
del deps_map[dep]["needs"][basedep]
deps_map[basedep]["provides"].remove(dep)
cycles = FindCycles()
seconds = time.time() - start
if "--quiet" not in emerge.opts and seconds >= 0.1:
print "Tree sanitized in %dm%.1fs" % (seconds / 60, seconds % 60)
def AddSecretDeps():
"""Find these tagged packages and add extra dependencies.
For debugging dependency problems.
"""
for bad in secret_deps:
needed = secret_deps[bad]
bad_pkg = None
needed_pkg = None
for dep in deps_map:
if dep.find(bad) != -1:
bad_pkg = dep
if dep.find(needed) != -1:
needed_pkg = dep
if bad_pkg and needed_pkg:
deps_map[needed_pkg]["provides"].add(bad_pkg)
deps_map[bad_pkg]["needs"][needed_pkg] = "secret"
def MergeChildren(pkg, merge_type):
"""Merge this package and all packages it provides."""
this_pkg = deps_map[pkg]
if (this_pkg[merge_type] or pkg not in final_pkgs):
return
# Mark this package as non-optional
deps_info[pkg]["optional"] = False
this_pkg[merge_type] = True
for w in this_pkg["provides"].difference(rebuild_blacklist):
MergeChildren(w, merge_type)
if this_pkg["action"] == "nomerge":
this_pkg["action"] = "merge"
def LocalPackageDatabase():
"""Get the modification times of the packages in the local database.
We need to know the modification times of the local packages so that we
know when they need to be rebuilt.
Returns:
A dict mapping package identifiers to modification times.
"""
vardb = emerge.trees[root]["vartree"].dbapi
local_pkgs = {}
for pkg in vardb.cpv_all():
local_pkgs[pkg] = vardb.aux_get(pkg, ["BUILD_TIME"])[0]
return local_pkgs
def AutoRebuildDeps(local_pkgs, remote_pkgs, cycles):
"""Recursively rebuild packages when necessary using modification times.
If you've modified a package, it's a good idea to rebuild all the packages
that depend on it from source. This function looks for any packages which
depend on packages that have been modified and ensures that they get
rebuilt.
Args:
local_pkgs: Modification times from the local database.
remote_pkgs: Modification times from the prebuilt server.
cycles: Dictionary returned from FindCycles()
Returns:
The set of packages we marked as needing to be merged.
"""
def PrebuiltsReady(pkg, pkg_db, cache):
"""Check whether the prebuilts are ready for pkg and all deps.
Args:
pkg: The specified package.
pkg_db: The package DB to use.
cache: A dict, where the results are stored.
Returns:
True iff the prebuilts are ready for pkg and all deps.
"""
if pkg in cache:
return cache[pkg]
if pkg not in pkg_db and pkg not in self.forced_remote_binary_packages:
cache[pkg] = False
else:
cache[pkg] = True
for dep in deps_map[pkg]["needs"]:
if not PrebuiltsReady(dep, pkg_db, cache):
cache[pkg] = False
break
return cache[pkg]
def LastModifiedWithDeps(pkg, pkg_db, cache):
"""Calculate the last modified time of a package and its dependencies.
This function looks at all the packages needed by the specified package
and checks the most recent modification time of all of those packages.
If the dependencies of a package were modified more recently than the
package itself, then we know the package needs to be rebuilt.
Args:
pkg: The specified package.
pkg_db: The package DB to use.
cache: A dict, where the last modified times are stored.
Returns:
The last modified time of the specified package and its dependencies.
"""
if pkg in cache:
return cache[pkg]
cache[pkg] = pkg_db.get(pkg, 0)
for dep in deps_map[pkg]["needs"]:
t = LastModifiedWithDeps(dep, pkg_db, cache)
cache[pkg] = max(cache[pkg], t)
return cache[pkg]
# For every package that's getting updated in our local cache (binary
# or source), make sure we also update the children. If a package is
# built from source, all children must also be built from source.
local_ready_cache, remote_ready_cache = {}, {}
local_mtime_cache, remote_mtime_cache = {}, {}
for pkg in final_pkgs.difference(rebuild_blacklist):
# If all the necessary local packages are ready, and their
# modification times are in sync, we don't need to do anything here.
local_mtime = LastModifiedWithDeps(pkg, local_pkgs, local_mtime_cache)
local_ready = PrebuiltsReady(pkg, local_pkgs, local_ready_cache)
if (not local_ready or local_pkgs.get(pkg, 0) < local_mtime and
pkg not in cycles):
# OK, at least one package is missing from the local cache or is
# outdated. This means we're going to have to install the package
# and all dependencies.
#
# If all the necessary remote packages are ready, and they're at
# least as new as our local packages, we can install them.
# Otherwise, we need to build from source.
remote_mtime = LastModifiedWithDeps(pkg, remote_pkgs,
remote_mtime_cache)
remote_ready = PrebuiltsReady(pkg, remote_pkgs, remote_ready_cache)
if remote_ready and (local_mtime <= remote_mtime or pkg in cycles):
MergeChildren(pkg, "mandatory")
else:
MergeChildren(pkg, "mandatory_source")
def UsePrebuiltPackages(remote_pkgs):
"""Update packages that can use prebuilts to do so."""
start = time.time()
# Build list of prebuilt packages.
prebuilt_pkgs = {}
for pkg, info in deps_map.iteritems():
if info and info["action"] == "merge":
if (not info["force_remote_binary"] and info["mandatory_source"] or
"--usepkgonly" not in emerge.opts and pkg not in remote_pkgs):
continue
db_pkg = emerge.depgraph._pkg(pkg, "binary", emerge.root_config)
if info["force_remote_binary"]:
# Undo our earlier hacks to the use flags so that the use flags
# display correctly.
db_pkg.use.enabled = db_pkg.metadata["USE"].split()
prebuilt_pkgs[pkg] = db_pkg
# Calculate what packages need to be rebuilt due to changes in use flags.
pkgsettings = emerge.depgraph._frozen_config.pkgsettings[root]
for pkg, db_pkg in prebuilt_pkgs.iteritems():
if not self.CheckUseFlags(pkgsettings, db_pkg, self.package_db[pkg]):
MergeChildren(pkg, "mandatory_source")
# Convert eligible packages to binaries.
for pkg, info in deps_map.iteritems():
if info and info["action"] == "merge" and pkg in prebuilt_pkgs:
if not info["mandatory_source"] or info["force_remote_binary"]:
self.package_db[pkg] = prebuilt_pkgs[pkg]
seconds = time.time() - start
if "--quiet" not in emerge.opts:
print "Prebuilt DB populated in %dm%.1fs" % (seconds / 60, seconds % 60)
return prebuilt_pkgs
ReverseTree(deps_tree)
BuildFinalPackageSet()
AddSecretDeps()
# Mark that we want to use remote binaries only for a particular package.
vardb = emerge.depgraph._frozen_config.trees[root]["vartree"].dbapi
for pkg in self.force_remote_binary:
for db_pkg in final_db.match_pkgs(pkg):
match = deps_map.get(str(db_pkg.cpv))
if match:
match["force_remote_binary"] = True
rebuild_blacklist.add(str(db_pkg.cpv))
if not vardb.match_pkgs(db_pkg.cpv):
MergeChildren(str(db_pkg.cpv), "mandatory")
if self.no_workon_deps:
for pkg in self.mandatory_source.copy():
for db_pkg in final_db.match_pkgs(pkg):
deps_map[str(db_pkg.cpv)]["mandatory_source"] = True
else:
for pkg in self.mandatory_source.copy():
for db_pkg in final_db.match_pkgs(pkg):
MergeChildren(str(db_pkg.cpv), "mandatory_source")
cycles = FindCycles()
if self.rebuild:
local_pkgs = LocalPackageDatabase()
AutoRebuildDeps(local_pkgs, remote_pkgs, cycles)
# We need to remove installed packages so that we can use the dependency
# ordering of the install process to show us what cycles to crack. Once
# we've done that, we also need to recalculate our list of cycles so that
# we don't include the installed packages in our cycles.
RemoveInstalledPackages()
SanitizeTree()
if deps_map:
if "--usepkg" in emerge.opts:
UsePrebuiltPackages(remote_pkgs)
return deps_map
def PrintInstallPlan(self, deps_map):
"""Print an emerge-style install plan.
The install plan lists what packages we're installing, in order.
It's useful for understanding what parallel_emerge is doing.
Args:
deps_map: The dependency graph.
"""
def InstallPlanAtNode(target, deps_map):
nodes = []
nodes.append(target)
for dep in deps_map[target]["provides"]:
del deps_map[dep]["needs"][target]
if not deps_map[dep]["needs"]:
nodes.extend(InstallPlanAtNode(dep, deps_map))
return nodes
deps_map = copy.deepcopy(deps_map)
install_plan = []
plan = set()
for target, info in deps_map.iteritems():
if not info["needs"] and target not in plan:
for item in InstallPlanAtNode(target, deps_map):
plan.add(item)
install_plan.append(self.package_db[item])
for pkg in plan:
del deps_map[pkg]
if deps_map:
print "Cyclic dependencies:", " ".join(deps_map)
PrintDepsMap(deps_map)
sys.exit(1)
self.emerge.depgraph.display(install_plan)
def PrintDepsMap(deps_map):
"""Print dependency graph, for each package list it's prerequisites."""
for i in sorted(deps_map):
print "%s: (%s) needs" % (i, deps_map[i]["action"])
needs = deps_map[i]["needs"]
for j in sorted(needs):
print " %s" % (j)
if not needs:
print " no dependencies"
class EmergeJobState(object):
__slots__ = ["done", "filename", "last_notify_timestamp", "last_output_seek",
"last_output_timestamp", "pkgname", "retcode", "start_timestamp",
"target"]
def __init__(self, target, pkgname, done, filename, start_timestamp,
retcode=None):
# The full name of the target we're building (e.g.
# chromeos-base/chromeos-0.0.1-r60)
self.target = target
# The short name of the target we're building (e.g. chromeos-0.0.1-r60)
self.pkgname = pkgname
# Whether the job is done. (True if the job is done; false otherwise.)
self.done = done
# The filename where output is currently stored.
self.filename = filename
# The timestamp of the last time we printed the name of the log file. We
# print this at the beginning of the job, so this starts at
# start_timestamp.
self.last_notify_timestamp = start_timestamp
# The location (in bytes) of the end of the last complete line we printed.
# This starts off at zero. We use this to jump to the right place when we
# print output from the same ebuild multiple times.
self.last_output_seek = 0
# The timestamp of the last time we printed output. Since we haven't
# printed output yet, this starts at zero.
self.last_output_timestamp = 0
# The return code of our job, if the job is actually finished.
self.retcode = retcode
# The timestamp when our job started.
self.start_timestamp = start_timestamp
def SetupWorkerSignals():
def ExitHandler(signum, frame):
# Remove our signal handlers so we don't get called recursively.
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
# Try to exit cleanly
sys.exit(1)
# Ensure that we exit quietly and cleanly, if possible, when we receive
# SIGTERM or SIGINT signals. By default, when the user hits CTRL-C, all
# of the child processes will print details about KeyboardInterrupt
# exceptions, which isn't very helpful.
signal.signal(signal.SIGINT, ExitHandler)
signal.signal(signal.SIGTERM, ExitHandler)
def EmergeWorker(task_queue, job_queue, emerge, package_db):
"""This worker emerges any packages given to it on the task_queue.
Args:
task_queue: The queue of tasks for this worker to do.
job_queue: The queue of results from the worker.
emerge: An EmergeData() object.
package_db: A dict, mapping package ids to portage Package objects.
It expects package identifiers to be passed to it via task_queue. When
a task is started, it pushes the (target, filename) to the started_queue.
The output is stored in filename. When a merge starts or finishes, we push
EmergeJobState objects to the job_queue.
"""
SetupWorkerSignals()
settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb
opts, spinner = emerge.opts, emerge.spinner
opts["--nodeps"] = True
if new_portage:
# When Portage launches new processes, it goes on a rampage and closes all
# open file descriptors. Ask Portage not to do that, as it breaks us.
portage.process.get_open_fds = lambda: []
while True:
# Wait for a new item to show up on the queue. This is a blocking wait,
# so if there's nothing to do, we just sit here.
target = task_queue.get()
if not target:
# If target is None, this means that the main thread wants us to quit.
# The other workers need to exit too, so we'll push the message back on
# to the queue so they'll get it too.
task_queue.put(target)
return
db_pkg = package_db[target]
db_pkg.root_config = emerge.root_config
install_list = [db_pkg]
pkgname = db_pkg.pf
output = tempfile.NamedTemporaryFile(prefix=pkgname + "-", delete=False)
start_timestamp = time.time()
job = EmergeJobState(target, pkgname, False, output.name, start_timestamp)
job_queue.put(job)
if "--pretend" in opts:
retcode = 0
else:
save_stdout = sys.stdout
save_stderr = sys.stderr
try:
sys.stdout = output
sys.stderr = output
if new_portage:
emerge.scheduler_graph.mergelist = install_list
scheduler = Scheduler(settings, trees, mtimedb, opts, spinner,
favorites=[], graph_config=emerge.scheduler_graph)
else:
scheduler = Scheduler(settings, trees, mtimedb, opts, spinner,
install_list, [], emerge.scheduler_graph)
retcode = scheduler.merge()
except Exception:
traceback.print_exc(file=output)
retcode = 1
finally:
sys.stdout = save_stdout
sys.stderr = save_stderr
output.close()
if retcode is None:
retcode = 0
job = EmergeJobState(target, pkgname, True, output.name, start_timestamp,
retcode)
job_queue.put(job)
class LinePrinter(object):
"""Helper object to print a single line."""
def __init__(self, line):
self.line = line
def Print(self, seek_locations):
print self.line
class JobPrinter(object):
"""Helper object to print output of a job."""
def __init__(self, job, unlink=False):
"""Print output of job.
If unlink is True, unlink the job output file when done."""
self.current_time = time.time()
self.job = job
self.unlink = unlink
def Print(self, seek_locations):
job = self.job
# Calculate how long the job has been running.
seconds = self.current_time - job.start_timestamp
# Note that we've printed out the job so far.
job.last_output_timestamp = self.current_time
# Note that we're starting the job
info = "job %s (%dm%.1fs)" % (job.pkgname, seconds / 60, seconds % 60)
last_output_seek = seek_locations.get(job.filename, 0)
if last_output_seek:
print "=== Continue output for %s ===" % info
else:
print "=== Start output for %s ===" % info
# Print actual output from job
f = codecs.open(job.filename, encoding='utf-8', errors='replace')
f.seek(last_output_seek)
prefix = job.pkgname + ":"
for line in f:
# Save off our position in the file
if line and line[-1] == "\n":
last_output_seek = f.tell()
line = line[:-1]
# Print our line
print prefix, line.encode('utf-8', 'replace')
f.close()
# Save our last spot in the file so that we don't print out the same
# location twice.
seek_locations[job.filename] = last_output_seek
# Note end of output section
if job.done:
print "=== Complete: %s ===" % info
else:
print "=== Still running: %s ===" % info
if self.unlink:
os.unlink(job.filename)
def PrintWorker(queue):
"""A worker that prints stuff to the screen as requested."""
def ExitHandler(signum, frame):
# Switch to default signal handlers so that we'll die after two signals.
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
# Don't exit on the first SIGINT / SIGTERM, because the parent worker will
# handle it and tell us when we need to exit.
signal.signal(signal.SIGINT, ExitHandler)
signal.signal(signal.SIGTERM, ExitHandler)
# seek_locations is a map indicating the position we are at in each file.
# It starts off empty, but is set by the various Print jobs as we go along
# to indicate where we left off in each file.
seek_locations = {}
while True:
try:
job = queue.get()
if job:
job.Print(seek_locations)
else:
break
except IOError as ex:
if ex.errno == errno.EINTR:
# Looks like we received a signal. Keep printing.
continue
raise
class EmergeQueue(object):
"""Class to schedule emerge jobs according to a dependency graph."""
def __init__(self, deps_map, emerge, package_db, show_output):
# Store the dependency graph.
self._deps_map = deps_map
# Initialize the running queue to empty
self._jobs = {}
# List of total package installs represented in deps_map.
install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"]
self._total_jobs = len(install_jobs)
self._show_output = show_output
if "--pretend" in emerge.opts:
print "Skipping merge because of --pretend mode."
sys.exit(0)
# Setup scheduler graph object. This is used by the child processes
# to help schedule jobs.
emerge.scheduler_graph = emerge.depgraph.schedulerGraph()
# Calculate how many jobs we can run in parallel. We don't want to pass
# the --jobs flag over to emerge itself, because that'll tell emerge to
# hide its output, and said output is quite useful for debugging hung
# jobs.
procs = min(self._total_jobs,
emerge.opts.pop("--jobs", multiprocessing.cpu_count()))
self._emerge_queue = multiprocessing.Queue()
self._job_queue = multiprocessing.Queue()
self._print_queue = multiprocessing.Queue()
args = (self._emerge_queue, self._job_queue, emerge, package_db)
self._pool = multiprocessing.Pool(procs, EmergeWorker, args)
self._print_worker = multiprocessing.Process(target=PrintWorker,
args=[self._print_queue])
self._print_worker.start()
# Initialize the failed queue to empty.
self._retry_queue = []
self._failed = set()
# Print an update before we launch the merges.
self._Status()
# Setup an exit handler so that we print nice messages if we are
# terminated.
self._SetupExitHandler()
# Schedule our jobs.
for target, info in deps_map.items():
if not info["needs"]:
self._Schedule(target)
def _SetupExitHandler(self):
def ExitHandler(signum, frame):
# Kill our signal handlers so we don't get called recursively
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
# Print our current job status
for target, job in self._jobs.iteritems():
if job:
self._print_queue.put(JobPrinter(job, unlink=True))
# Notify the user that we are exiting
self._Print("Exiting on signal %s" % signum)
# Kill child threads, then exit.
self._Exit()
sys.exit(1)
# Print out job status when we are killed
signal.signal(signal.SIGINT, ExitHandler)
signal.signal(signal.SIGTERM, ExitHandler)
def _Schedule(self, target):
# We maintain a tree of all deps, if this doesn't need
# to be installed just free up it's children and continue.
# It is possible to reinstall deps of deps, without reinstalling
# first level deps, like so:
# chromeos (merge) -> eselect (nomerge) -> python (merge)
if self._deps_map[target]["action"] == "nomerge":
self._Finish(target)
else:
# Kick off the build if it's marked to be built.
self._jobs[target] = None
self._emerge_queue.put(target)
def _LoadAvg(self):
loads = open("/proc/loadavg", "r").readline().split()[:3]
return " ".join(loads)
def _Print(self, line):
"""Print a single line."""
self._print_queue.put(LinePrinter(line))
def _Status(self):
"""Print status."""
current_time = time.time()
no_output = True
# Print interim output every minute if --show-output is used. Otherwise,
# print notifications about running packages every 2 minutes, and print
# full output for jobs that have been running for 60 minutes or more.
if self._show_output:
interval = 60
notify_interval = 0
else:
interval = 60 * 60
notify_interval = 60 * 2
for target, job in self._jobs.iteritems():
if job:
last_timestamp = max(job.start_timestamp, job.last_output_timestamp)
if last_timestamp + interval < current_time:
self._print_queue.put(JobPrinter(job))
job.last_output_timestamp = current_time
no_output = False
elif (notify_interval and
job.last_notify_timestamp + notify_interval < current_time):
job_seconds = current_time - job.start_timestamp
args = (job.pkgname, job_seconds / 60, job_seconds % 60, job.filename)
info = "Still building %s (%dm%.1fs). Logs in %s" % args
job.last_notify_timestamp = current_time
self._Print(info)
no_output = False
# If we haven't printed any messages yet, print a general status message
# here.
if no_output:
seconds = current_time - GLOBAL_START
line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s "
"[Time %dm%.1fs Load %s]")
qsize = self._emerge_queue.qsize()
self._Print(line % (len(self._deps_map), qsize, len(self._jobs) - qsize,
len(self._retry_queue), self._total_jobs,
seconds / 60, seconds % 60, self._LoadAvg()))
def _Finish(self, target):
"""Mark a target as completed and unblock dependecies."""
for dep in self._deps_map[target]["provides"]:
del self._deps_map[dep]["needs"][target]
if not self._deps_map[dep]["needs"]:
self._Schedule(dep)
self._deps_map.pop(target)
def _Retry(self):
if self._retry_queue:
target = self._retry_queue.pop(0)
self._Schedule(target)
self._Print("Retrying emerge of %s." % target)
def _Exit(self):
# Tell emerge workers to exit. They all exit when 'None' is pushed
# to the queue.
self._emerge_queue.put(None)
self._pool.close()
self._pool.join()
# Now that our workers are finished, we can kill the print queue.
self._print_queue.put(None)
self._print_worker.join()
def Run(self):
"""Run through the scheduled ebuilds.
Keep running so long as we have uninstalled packages in the
dependency graph to merge.
"""
while self._deps_map:
# Check here that we are actually waiting for something.
if (self._emerge_queue.empty() and
self._job_queue.empty() and
not self._jobs and
self._deps_map):
# If we have failed on a package, retry it now.
if self._retry_queue:
self._Retry()
else:
# Tell child threads to exit.
self._Exit()
# The dependency map is helpful for debugging failures.
PrintDepsMap(self._deps_map)
# Tell the user why we're exiting.
if self._failed:
print "Packages failed: %s" % " ,".join(self._failed)
else:
print "Deadlock! Circular dependencies!"
sys.exit(1)
try:
job = self._job_queue.get(timeout=5)
except Queue.Empty:
# Print an update.
self._Status()
continue
target = job.target
if not job.done:
self._jobs[target] = job
self._Print("Started %s (logged in %s)" % (target, job.filename))
continue
# Print output of job
if self._show_output or job.retcode != 0:
self._print_queue.put(JobPrinter(job, unlink=True))
else:
os.unlink(job.filename)
del self._jobs[target]
seconds = time.time() - job.start_timestamp
details = "%s (in %dm%.1fs)" % (target, seconds / 60, seconds % 60)
# Complain if necessary.
if job.retcode != 0:
# Handle job failure.
if target in self._failed:
# If this job has failed previously, give up.
self._Print("Failed %s. Your build has failed." % details)
else:
# Queue up this build to try again after a long while.
self._retry_queue.append(target)
self._failed.add(target)
self._Print("Failed %s, retrying later." % details)
else:
if target in self._failed and self._retry_queue:
# If we have successfully retried a failed package, and there
# are more failed packages, try the next one. We will only have
# one retrying package actively running at a time.
self._Retry()
self._Print("Completed %s" % details)
# Mark as completed and unblock waiting ebuilds.
self._Finish(target)
# Print an update.
self._Status()
# Tell child threads to exit.
self._Print("Merge complete")
self._Exit()
def main():
deps = DepGraphGenerator()
deps.Initialize(sys.argv[1:])
emerge = deps.emerge
if emerge.action is not None:
sys.argv = deps.ParseParallelEmergeArgs(sys.argv)
sys.exit(emerge_main())
elif not emerge.cmdline_packages:
Usage()
sys.exit(1)
# Unless we're in pretend mode, there's not much point running without
# root access. We need to be able to install packages.
#
# NOTE: Even if you're running --pretend, it's a good idea to run
# parallel_emerge with root access so that portage can write to the
# dependency cache. This is important for performance.
if "--pretend" not in emerge.opts and portage.secpass < 2:
print "parallel_emerge: superuser access is required."
sys.exit(1)
if "--quiet" not in emerge.opts:
cmdline_packages = " ".join(emerge.cmdline_packages)
nomerge_packages = " ".join(deps.nomerge)
print "Starting fast-emerge."
print " Building package %s on %s" % (cmdline_packages,
deps.board or "root")
if nomerge_packages:
print " Skipping package %s on %s" % (nomerge_packages,
deps.board or "root")
remote_pkgs = {}
if "--getbinpkg" in emerge.opts:
remote_pkgs = deps.RemotePackageDatabase()
deps_tree, deps_info = deps.GenDependencyTree(remote_pkgs)
# You want me to be verbose? I'll give you two trees! Twice as much value.
if "--tree" in emerge.opts and "--verbose" in emerge.opts:
deps.PrintTree(deps_tree)
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info, remote_pkgs)
# OK, time to print out our progress so far.
deps.PrintInstallPlan(deps_graph)
if "--tree" in emerge.opts:
PrintDepsMap(deps_graph)
# Are we upgrading portage? If so, and there are more packages to merge,
# schedule a restart of parallel_emerge to merge the rest. This ensures that
# we pick up all updates to portage settings before merging any more
# packages.
portage_upgrade = False
root = emerge.settings["ROOT"]
final_db = emerge.depgraph._dynamic_config.mydbapi[root]
if root == "/":
for db_pkg in final_db.match_pkgs("sys-apps/portage"):
portage_pkg = deps_graph.get(db_pkg.cpv)
if portage_pkg and len(deps_graph) > 1:
portage_pkg["needs"].clear()
portage_pkg["provides"].clear()
deps_graph = { str(db_pkg.cpv): portage_pkg }
portage_upgrade = True
if "--quiet" not in emerge.opts:
print "Upgrading portage first, then restarting..."
# Run the queued emerges.
scheduler = EmergeQueue(deps_graph, emerge, deps.package_db, deps.show_output)
scheduler.Run()
# Update world.
if ("--oneshot" not in emerge.opts and
"--pretend" not in emerge.opts):
world_set = emerge.root_config.sets["selected"]
new_world_pkgs = []
for pkg in emerge.cmdline_packages:
for db_pkg in final_db.match_pkgs(pkg):
print "Adding %s to world" % db_pkg.cp
new_world_pkgs.append(db_pkg.cp)
if new_world_pkgs:
world_set.update(new_world_pkgs)
# Update environment (library cache, symlinks, etc.)
if deps.board and "--pretend" not in emerge.opts:
portage.env_update()
# If we already upgraded portage, we don't need to do so again. But we do
# need to upgrade the rest of the packages. So we'll go ahead and do that.
if portage_upgrade:
args = sys.argv[1:] + ["--nomerge=sys-apps/portage"]
os.execvp(os.path.realpath(sys.argv[0]), args)
print "Done"
sys.exit(0)
if __name__ == "__main__":
main()