blob: c2199655a556a5603a90e226d80dfd1966fc8c0a [file] [log] [blame]
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Program to run emerge in parallel, for significant speedup.
Usage:
./parallel_emerge [--board=BOARD] [--workon=PKGS]
[--force-remote-binary=PKGS] [emerge args] package
This script runs multiple emerge processes in parallel, using appropriate
Portage APIs. It is faster than standard emerge because it has a
multiprocess model instead of an asynchronous model.
"""
from __future__ import print_function
import codecs
import copy
import errno
import gc
import heapq
import multiprocessing
import os
try:
import Queue
except ImportError:
# Python-3 renamed to "queue". We still use Queue to avoid collisions
# with naming variables as "queue". Maybe we'll transition at some point.
# pylint: disable=F0401
import queue as Queue
import signal
import subprocess
import sys
import tempfile
import threading
import time
import traceback
from chromite.lib import cros_build_lib
from chromite.lib import process_util
from chromite.lib import proctitle
# If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On
# Chromium OS, the default "portage" user doesn't have the necessary
# permissions. It'd be easier if we could default to $USERNAME, but $USERNAME
# is "root" here because we get called through sudo.
#
# We need to set this before importing any portage modules, because portage
# looks up "PORTAGE_USERNAME" at import time.
#
# NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't
# encounter this case unless they have an old chroot or blow away the
# environment by running sudo without the -E specifier.
if "PORTAGE_USERNAME" not in os.environ:
homedir = os.environ.get("HOME")
if homedir:
os.environ["PORTAGE_USERNAME"] = os.path.basename(homedir)
# Wrap Popen with a lock to ensure no two Popen are executed simultaneously in
# the same process.
# Two Popen call at the same time might be the cause for crbug.com/433482.
_popen_lock = threading.Lock()
_old_popen = subprocess.Popen
def _LockedPopen(*args, **kwargs):
with _popen_lock:
return _old_popen(*args, **kwargs)
subprocess.Popen = _LockedPopen
# Portage doesn't expose dependency trees in its public API, so we have to
# make use of some private APIs here. These modules are found under
# /usr/lib/portage/pym/.
#
# TODO(davidjames): Update Portage to expose public APIs for these features.
# pylint: disable=F0401
from _emerge.actions import adjust_configs
from _emerge.actions import load_emerge_config
from _emerge.create_depgraph_params import create_depgraph_params
from _emerge.depgraph import backtrack_depgraph
from _emerge.main import emerge_main
from _emerge.main import parse_opts
from _emerge.Package import Package
from _emerge.post_emerge import clean_logs
from _emerge.Scheduler import Scheduler
from _emerge.stdout_spinner import stdout_spinner
from portage._global_updates import _global_updates
import portage
import portage.debug
# pylint: enable=F0401
def Usage():
"""Print usage."""
print("Usage:")
print(" ./parallel_emerge [--board=BOARD] [--workon=PKGS]")
print(" [--rebuild] [emerge args] package")
print()
print("Packages specified as workon packages are always built from source.")
print()
print("The --workon argument is mainly useful when you want to build and")
print("install packages that you are working on unconditionally, but do not")
print("to have to rev the package to indicate you want to build it from")
print("source. The build_packages script will automatically supply the")
print("workon argument to emerge, ensuring that packages selected using")
print("cros-workon are rebuilt.")
print()
print("The --rebuild option rebuilds packages whenever their dependencies")
print("are changed. This ensures that your build is correct.")
# Global start time
GLOBAL_START = time.time()
# Whether process has been killed by a signal.
KILLED = multiprocessing.Event()
class EmergeData(object):
"""This simple struct holds various emerge variables.
This struct helps us easily pass emerge variables around as a unit.
These variables are used for calculating dependencies and installing
packages.
"""
__slots__ = ["action", "cmdline_packages", "depgraph", "favorites",
"mtimedb", "opts", "root_config", "scheduler_graph",
"settings", "spinner", "trees"]
def __init__(self):
# The action the user requested. If the user is installing packages, this
# is None. If the user is doing anything other than installing packages,
# this will contain the action name, which will map exactly to the
# long-form name of the associated emerge option.
#
# Example: If you call parallel_emerge --unmerge package, the action name
# will be "unmerge"
self.action = None
# The list of packages the user passed on the command-line.
self.cmdline_packages = None
# The emerge dependency graph. It'll contain all the packages involved in
# this merge, along with their versions.
self.depgraph = None
# The list of candidates to add to the world file.
self.favorites = None
# A dict of the options passed to emerge. This dict has been cleaned up
# a bit by parse_opts, so that it's a bit easier for the emerge code to
# look at the options.
#
# Emerge takes a few shortcuts in its cleanup process to make parsing of
# the options dict easier. For example, if you pass in "--usepkg=n", the
# "--usepkg" flag is just left out of the dictionary altogether. Because
# --usepkg=n is the default, this makes parsing easier, because emerge
# can just assume that if "--usepkg" is in the dictionary, it's enabled.
#
# These cleanup processes aren't applied to all options. For example, the
# --with-bdeps flag is passed in as-is. For a full list of the cleanups
# applied by emerge, see the parse_opts function in the _emerge.main
# package.
self.opts = None
# A dictionary used by portage to maintain global state. This state is
# loaded from disk when portage starts up, and saved to disk whenever we
# call mtimedb.commit().
#
# This database contains information about global updates (i.e., what
# version of portage we have) and what we're currently doing. Portage
# saves what it is currently doing in this database so that it can be
# resumed when you call it with the --resume option.
#
# parallel_emerge does not save what it is currently doing in the mtimedb,
# so we do not support the --resume option.
self.mtimedb = None
# The portage configuration for our current root. This contains the portage
# settings (see below) and the three portage trees for our current root.
# (The three portage trees are explained below, in the documentation for
# the "trees" member.)
self.root_config = None
# The scheduler graph is used by emerge to calculate what packages to
# install. We don't actually install any deps, so this isn't really used,
# but we pass it in to the Scheduler object anyway.
self.scheduler_graph = None
# Portage settings for our current session. Most of these settings are set
# in make.conf inside our current install root.
self.settings = None
# The spinner, which spews stuff to stdout to indicate that portage is
# doing something. We maintain our own spinner, so we set the portage
# spinner to "silent" mode.
self.spinner = None
# The portage trees. There are separate portage trees for each root. To get
# the portage tree for the current root, you can look in self.trees[root],
# where root = self.settings["ROOT"].
#
# In each root, there are three trees: vartree, porttree, and bintree.
# - vartree: A database of the currently-installed packages.
# - porttree: A database of ebuilds, that can be used to build packages.
# - bintree: A database of binary packages.
self.trees = None
class DepGraphGenerator(object):
"""Grab dependency information about packages from portage.
Typical usage:
deps = DepGraphGenerator()
deps.Initialize(sys.argv[1:])
deps_tree, deps_info = deps.GenDependencyTree()
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
deps.PrintTree(deps_tree)
PrintDepsMap(deps_graph)
"""
__slots__ = ["board", "emerge", "package_db", "show_output", "sysroot",
"unpack_only", "max_retries"]
def __init__(self):
self.board = None
self.emerge = EmergeData()
self.package_db = {}
self.show_output = False
self.sysroot = None
self.unpack_only = False
self.max_retries = 1
def ParseParallelEmergeArgs(self, argv):
"""Read the parallel emerge arguments from the command-line.
We need to be compatible with emerge arg format. We scrape arguments that
are specific to parallel_emerge, and pass through the rest directly to
emerge.
Args:
argv: arguments list
Returns:
Arguments that don't belong to parallel_emerge
"""
emerge_args = []
for arg in argv:
# Specifically match arguments that are specific to parallel_emerge, and
# pass through the rest.
if arg.startswith("--board="):
self.board = arg.replace("--board=", "")
elif arg.startswith("--sysroot="):
self.sysroot = arg.replace("--sysroot=", "")
elif arg.startswith("--workon="):
workon_str = arg.replace("--workon=", "")
emerge_args.append("--reinstall-atoms=%s" % workon_str)
emerge_args.append("--usepkg-exclude=%s" % workon_str)
elif arg.startswith("--force-remote-binary="):
force_remote_binary = arg.replace("--force-remote-binary=", "")
emerge_args.append("--useoldpkg-atoms=%s" % force_remote_binary)
elif arg.startswith("--retries="):
self.max_retries = int(arg.replace("--retries=", ""))
elif arg == "--show-output":
self.show_output = True
elif arg == "--rebuild":
emerge_args.append("--rebuild-if-unbuilt")
elif arg == "--unpackonly":
emerge_args.append("--fetchonly")
self.unpack_only = True
else:
# Not one of our options, so pass through to emerge.
emerge_args.append(arg)
# These packages take a really long time to build, so, for expediency, we
# are blacklisting them from automatic rebuilds because one of their
# dependencies needs to be recompiled.
for pkg in ("chromeos-base/chromeos-chrome",):
emerge_args.append("--rebuild-exclude=%s" % pkg)
return emerge_args
def Initialize(self, args):
"""Initializer. Parses arguments and sets up portage state."""
# Parse and strip out args that are just intended for parallel_emerge.
emerge_args = self.ParseParallelEmergeArgs(args)
if self.sysroot and self.board:
cros_build_lib.Die("--sysroot and --board are incompatible.")
# Setup various environment variables based on our current board. These
# variables are normally setup inside emerge-${BOARD}, but since we don't
# call that script, we have to set it up here. These variables serve to
# point our tools at /build/BOARD and to setup cross compiles to the
# appropriate board as configured in toolchain.conf.
if self.board:
self.sysroot = os.environ.get('SYSROOT',
cros_build_lib.GetSysroot(self.board))
if self.sysroot:
os.environ["PORTAGE_CONFIGROOT"] = self.sysroot
os.environ["SYSROOT"] = self.sysroot
# Turn off interactive delays
os.environ["EBEEP_IGNORE"] = "1"
os.environ["EPAUSE_IGNORE"] = "1"
os.environ["CLEAN_DELAY"] = "0"
# Parse the emerge options.
action, opts, cmdline_packages = parse_opts(emerge_args, silent=True)
# Set environment variables based on options. Portage normally sets these
# environment variables in emerge_main, but we can't use that function,
# because it also does a bunch of other stuff that we don't want.
# TODO(davidjames): Patch portage to move this logic into a function we can
# reuse here.
if "--debug" in opts:
os.environ["PORTAGE_DEBUG"] = "1"
if "--config-root" in opts:
os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"]
if "--root" in opts:
os.environ["ROOT"] = opts["--root"]
if "--accept-properties" in opts:
os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"]
# If we're installing packages to the board, we can disable vardb locks.
# This is safe because we only run up to one instance of parallel_emerge in
# parallel.
# TODO(davidjames): Enable this for the host too.
if self.sysroot:
os.environ.setdefault("PORTAGE_LOCKS", "false")
# Now that we've setup the necessary environment variables, we can load the
# emerge config from disk.
# pylint: disable=unpacking-non-sequence
settings, trees, mtimedb = load_emerge_config()
# Add in EMERGE_DEFAULT_OPTS, if specified.
tmpcmdline = []
if "--ignore-default-opts" not in opts:
tmpcmdline.extend(settings["EMERGE_DEFAULT_OPTS"].split())
tmpcmdline.extend(emerge_args)
action, opts, cmdline_packages = parse_opts(tmpcmdline)
# If we're installing to the board, we want the --root-deps option so that
# portage will install the build dependencies to that location as well.
if self.sysroot:
opts.setdefault("--root-deps", True)
# Check whether our portage tree is out of date. Typically, this happens
# when you're setting up a new portage tree, such as in setup_board and
# make_chroot. In that case, portage applies a bunch of global updates
# here. Once the updates are finished, we need to commit any changes
# that the global update made to our mtimedb, and reload the config.
#
# Portage normally handles this logic in emerge_main, but again, we can't
# use that function here.
if _global_updates(trees, mtimedb["updates"]):
mtimedb.commit()
# pylint: disable=unpacking-non-sequence
settings, trees, mtimedb = load_emerge_config(trees=trees)
# Setup implied options. Portage normally handles this logic in
# emerge_main.
if "--buildpkgonly" in opts or "buildpkg" in settings.features:
opts.setdefault("--buildpkg", True)
if "--getbinpkgonly" in opts:
opts.setdefault("--usepkgonly", True)
opts.setdefault("--getbinpkg", True)
if "getbinpkg" in settings.features:
# Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n
opts["--getbinpkg"] = True
if "--getbinpkg" in opts or "--usepkgonly" in opts:
opts.setdefault("--usepkg", True)
if "--fetch-all-uri" in opts:
opts.setdefault("--fetchonly", True)
if "--skipfirst" in opts:
opts.setdefault("--resume", True)
if "--buildpkgonly" in opts:
# --buildpkgonly will not merge anything, so it overrides all binary
# package options.
for opt in ("--getbinpkg", "--getbinpkgonly",
"--usepkg", "--usepkgonly"):
opts.pop(opt, None)
if (settings.get("PORTAGE_DEBUG", "") == "1" and
"python-trace" in settings.features):
portage.debug.set_trace(True)
# Complain about unsupported options
for opt in ("--ask", "--ask-enter-invalid", "--resume", "--skipfirst"):
if opt in opts:
print("%s is not supported by parallel_emerge" % opt)
sys.exit(1)
# Make emerge specific adjustments to the config (e.g. colors!)
adjust_configs(opts, trees)
# Save our configuration so far in the emerge object
emerge = self.emerge
emerge.action, emerge.opts = action, opts
emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb
emerge.cmdline_packages = cmdline_packages
root = settings["ROOT"]
emerge.root_config = trees[root]["root_config"]
if "--usepkg" in opts:
emerge.trees[root]["bintree"].populate("--getbinpkg" in opts)
def CreateDepgraph(self, emerge, packages):
"""Create an emerge depgraph object."""
# Setup emerge options.
emerge_opts = emerge.opts.copy()
# Ask portage to build a dependency graph. with the options we specified
# above.
params = create_depgraph_params(emerge_opts, emerge.action)
success, depgraph, favorites = backtrack_depgraph(
emerge.settings, emerge.trees, emerge_opts, params, emerge.action,
packages, emerge.spinner)
emerge.depgraph = depgraph
# Is it impossible to honor the user's request? Bail!
if not success:
depgraph.display_problems()
sys.exit(1)
emerge.depgraph = depgraph
emerge.favorites = favorites
# Prime and flush emerge caches.
root = emerge.settings["ROOT"]
vardb = emerge.trees[root]["vartree"].dbapi
if "--pretend" not in emerge.opts:
vardb.counter_tick()
vardb.flush_cache()
def GenDependencyTree(self):
"""Get dependency tree info from emerge.
Returns:
Dependency tree
"""
start = time.time()
emerge = self.emerge
# Create a list of packages to merge
packages = set(emerge.cmdline_packages[:])
# Tell emerge to be quiet. We print plenty of info ourselves so we don't
# need any extra output from portage.
portage.util.noiselimit = -1
# My favorite feature: The silent spinner. It doesn't spin. Ever.
# I'd disable the colors by default too, but they look kind of cool.
emerge.spinner = stdout_spinner()
emerge.spinner.update = emerge.spinner.update_quiet
if "--quiet" not in emerge.opts:
print("Calculating deps...")
self.CreateDepgraph(emerge, packages)
depgraph = emerge.depgraph
# Build our own tree from the emerge digraph.
deps_tree = {}
# pylint: disable=W0212
digraph = depgraph._dynamic_config.digraph
root = emerge.settings["ROOT"]
final_db = depgraph._dynamic_config._filtered_trees[root]['graph_db']
for node, node_deps in digraph.nodes.items():
# Calculate dependency packages that need to be installed first. Each
# child on the digraph is a dependency. The "operation" field specifies
# what we're doing (e.g. merge, uninstall, etc.). The "priorities" array
# contains the type of dependency (e.g. build, runtime, runtime_post,
# etc.)
#
# Portage refers to the identifiers for packages as a CPV. This acronym
# stands for Component/Path/Version.
#
# Here's an example CPV: chromeos-base/power_manager-0.0.1-r1
# Split up, this CPV would be:
# C -- Component: chromeos-base
# P -- Path: power_manager
# V -- Version: 0.0.1-r1
#
# We just refer to CPVs as packages here because it's easier.
deps = {}
for child, priorities in node_deps[0].items():
if isinstance(child, Package) and child.root == root:
cpv = str(child.cpv)
action = str(child.operation)
# If we're uninstalling a package, check whether Portage is
# installing a replacement. If so, just depend on the installation
# of the new package, because the old package will automatically
# be uninstalled at that time.
if action == "uninstall":
for pkg in final_db.match_pkgs(child.slot_atom):
cpv = str(pkg.cpv)
action = "merge"
break
deps[cpv] = dict(action=action,
deptypes=[str(x) for x in priorities],
deps={})
# We've built our list of deps, so we can add our package to the tree.
if isinstance(node, Package) and node.root == root:
deps_tree[str(node.cpv)] = dict(action=str(node.operation),
deps=deps)
# Ask portage for its install plan, so that we can only throw out
# dependencies that portage throws out.
deps_info = {}
for pkg in depgraph.altlist():
if isinstance(pkg, Package):
assert pkg.root == root
self.package_db[pkg.cpv] = pkg
# Save off info about the package
deps_info[str(pkg.cpv)] = {"idx": len(deps_info)}
seconds = time.time() - start
if "--quiet" not in emerge.opts:
print("Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60))
return deps_tree, deps_info
def PrintTree(self, deps, depth=""):
"""Print the deps we have seen in the emerge output.
Args:
deps: Dependency tree structure.
depth: Allows printing the tree recursively, with indentation.
"""
for entry in sorted(deps):
action = deps[entry]["action"]
print("%s %s (%s)" % (depth, entry, action))
self.PrintTree(deps[entry]["deps"], depth=depth + " ")
def GenDependencyGraph(self, deps_tree, deps_info):
"""Generate a doubly linked dependency graph.
Args:
deps_tree: Dependency tree structure.
deps_info: More details on the dependencies.
Returns:
Deps graph in the form of a dict of packages, with each package
specifying a "needs" list and "provides" list.
"""
emerge = self.emerge
# deps_map is the actual dependency graph.
#
# Each package specifies a "needs" list and a "provides" list. The "needs"
# list indicates which packages we depend on. The "provides" list
# indicates the reverse dependencies -- what packages need us.
#
# We also provide some other information in the dependency graph:
# - action: What we're planning on doing with this package. Generally,
# "merge", "nomerge", or "uninstall"
deps_map = {}
def ReverseTree(packages):
"""Convert tree to digraph.
Take the tree of package -> requirements and reverse it to a digraph of
buildable packages -> packages they unblock.
Args:
packages: Tree(s) of dependencies.
Returns:
Unsanitized digraph.
"""
binpkg_phases = set(["setup", "preinst", "postinst"])
needed_dep_types = set(["blocker", "buildtime", "buildtime_slot_op",
"runtime", "runtime_slot_op"])
ignored_dep_types = set(["ignored", "optional", "runtime_post", "soft"])
all_dep_types = ignored_dep_types | needed_dep_types
for pkg in packages:
# Create an entry for the package
action = packages[pkg]["action"]
default_pkg = {"needs": {}, "provides": set(), "action": action,
"nodeps": False, "binary": False}
this_pkg = deps_map.setdefault(pkg, default_pkg)
if pkg in deps_info:
this_pkg["idx"] = deps_info[pkg]["idx"]
# If a package doesn't have any defined phases that might use the
# dependent packages (i.e. pkg_setup, pkg_preinst, or pkg_postinst),
# we can install this package before its deps are ready.
emerge_pkg = self.package_db.get(pkg)
if emerge_pkg and emerge_pkg.type_name == "binary":
this_pkg["binary"] = True
defined_phases = emerge_pkg.defined_phases
defined_binpkg_phases = binpkg_phases.intersection(defined_phases)
if not defined_binpkg_phases:
this_pkg["nodeps"] = True
# Create entries for dependencies of this package first.
ReverseTree(packages[pkg]["deps"])
# Add dependencies to this package.
for dep, dep_item in packages[pkg]["deps"].iteritems():
# We only need to enforce strict ordering of dependencies if the
# dependency is a blocker, or is a buildtime or runtime dependency.
# (I.e., ignored, optional, and runtime_post dependencies don't
# depend on ordering.)
dep_types = dep_item["deptypes"]
if needed_dep_types.intersection(dep_types):
deps_map[dep]["provides"].add(pkg)
this_pkg["needs"][dep] = "/".join(dep_types)
# Verify we processed all appropriate dependency types.
unknown_dep_types = set(dep_types) - all_dep_types
if unknown_dep_types:
print("Unknown dependency types found:")
print(" %s -> %s (%s)" % (pkg, dep, "/".join(unknown_dep_types)))
sys.exit(1)
# If there's a blocker, Portage may need to move files from one
# package to another, which requires editing the CONTENTS files of
# both packages. To avoid race conditions while editing this file,
# the two packages must not be installed in parallel, so we can't
# safely ignore dependencies. See http://crosbug.com/19328
if "blocker" in dep_types:
this_pkg["nodeps"] = False
def FindCycles():
"""Find cycles in the dependency tree.
Returns:
A dict mapping cyclic packages to a dict of the deps that cause
cycles. For each dep that causes cycles, it returns an example
traversal of the graph that shows the cycle.
"""
def FindCyclesAtNode(pkg, cycles, unresolved, resolved):
"""Find cycles in cyclic dependencies starting at specified package.
Args:
pkg: Package identifier.
cycles: A dict mapping cyclic packages to a dict of the deps that
cause cycles. For each dep that causes cycles, it returns an
example traversal of the graph that shows the cycle.
unresolved: Nodes that have been visited but are not fully processed.
resolved: Nodes that have been visited and are fully processed.
"""
pkg_cycles = cycles.get(pkg)
if pkg in resolved and not pkg_cycles:
# If we already looked at this package, and found no cyclic
# dependencies, we can stop now.
return
unresolved.append(pkg)
for dep in deps_map[pkg]["needs"]:
if dep in unresolved:
idx = unresolved.index(dep)
mycycle = unresolved[idx:] + [dep]
for i in xrange(len(mycycle) - 1):
pkg1, pkg2 = mycycle[i], mycycle[i+1]
cycles.setdefault(pkg1, {}).setdefault(pkg2, mycycle)
elif not pkg_cycles or dep not in pkg_cycles:
# Looks like we haven't seen this edge before.
FindCyclesAtNode(dep, cycles, unresolved, resolved)
unresolved.pop()
resolved.add(pkg)
cycles, unresolved, resolved = {}, [], set()
for pkg in deps_map:
FindCyclesAtNode(pkg, cycles, unresolved, resolved)
return cycles
def RemoveUnusedPackages():
"""Remove installed packages, propagating dependencies."""
# Schedule packages that aren't on the install list for removal
rm_pkgs = set(deps_map.keys()) - set(deps_info.keys())
# Remove the packages we don't want, simplifying the graph and making
# it easier for us to crack cycles.
for pkg in sorted(rm_pkgs):
this_pkg = deps_map[pkg]
needs = this_pkg["needs"]
provides = this_pkg["provides"]
for dep in needs:
dep_provides = deps_map[dep]["provides"]
dep_provides.update(provides)
dep_provides.discard(pkg)
dep_provides.discard(dep)
for target in provides:
target_needs = deps_map[target]["needs"]
target_needs.update(needs)
target_needs.pop(pkg, None)
target_needs.pop(target, None)
del deps_map[pkg]
def PrintCycleBreak(basedep, dep, mycycle):
"""Print details about a cycle that we are planning on breaking.
We are breaking a cycle where dep needs basedep. mycycle is an
example cycle which contains dep -> basedep.
"""
needs = deps_map[dep]["needs"]
depinfo = needs.get(basedep, "deleted")
# It's OK to swap install order for blockers, as long as the two
# packages aren't installed in parallel. If there is a cycle, then
# we know the packages depend on each other already, so we can drop the
# blocker safely without printing a warning.
if depinfo == "blocker":
return
# Notify the user that we're breaking a cycle.
print("Breaking %s -> %s (%s)" % (dep, basedep, depinfo))
# Show cycle.
for i in xrange(len(mycycle) - 1):
pkg1, pkg2 = mycycle[i], mycycle[i+1]
needs = deps_map[pkg1]["needs"]
depinfo = needs.get(pkg2, "deleted")
if pkg1 == dep and pkg2 == basedep:
depinfo = depinfo + ", deleting"
print(" %s -> %s (%s)" % (pkg1, pkg2, depinfo))
def SanitizeTree():
"""Remove circular dependencies.
We prune all dependencies involved in cycles that go against the emerge
ordering. This has a nice property: we're guaranteed to merge
dependencies in the same order that portage does.
Because we don't treat any dependencies as "soft" unless they're killed
by a cycle, we pay attention to a larger number of dependencies when
merging. This hurts performance a bit, but helps reliability.
"""
start = time.time()
cycles = FindCycles()
while cycles:
for dep, mycycles in cycles.iteritems():
for basedep, mycycle in mycycles.iteritems():
if deps_info[basedep]["idx"] >= deps_info[dep]["idx"]:
if "--quiet" not in emerge.opts:
PrintCycleBreak(basedep, dep, mycycle)
del deps_map[dep]["needs"][basedep]
deps_map[basedep]["provides"].remove(dep)
cycles = FindCycles()
seconds = time.time() - start
if "--quiet" not in emerge.opts and seconds >= 0.1:
print("Tree sanitized in %dm%.1fs" % (seconds / 60, seconds % 60))
def FindRecursiveProvides(pkg, seen):
"""Find all nodes that require a particular package.
Assumes that graph is acyclic.
Args:
pkg: Package identifier.
seen: Nodes that have been visited so far.
"""
if pkg in seen:
return
seen.add(pkg)
info = deps_map[pkg]
info["tprovides"] = info["provides"].copy()
for dep in info["provides"]:
FindRecursiveProvides(dep, seen)
info["tprovides"].update(deps_map[dep]["tprovides"])
ReverseTree(deps_tree)
# We need to remove unused packages so that we can use the dependency
# ordering of the install process to show us what cycles to crack.
RemoveUnusedPackages()
SanitizeTree()
seen = set()
for pkg in deps_map:
FindRecursiveProvides(pkg, seen)
return deps_map
def PrintInstallPlan(self, deps_map):
"""Print an emerge-style install plan.
The install plan lists what packages we're installing, in order.
It's useful for understanding what parallel_emerge is doing.
Args:
deps_map: The dependency graph.
"""
def InstallPlanAtNode(target, deps_map):
nodes = []
nodes.append(target)
for dep in deps_map[target]["provides"]:
del deps_map[dep]["needs"][target]
if not deps_map[dep]["needs"]:
nodes.extend(InstallPlanAtNode(dep, deps_map))
return nodes
deps_map = copy.deepcopy(deps_map)
install_plan = []
plan = set()
for target, info in deps_map.iteritems():
if not info["needs"] and target not in plan:
for item in InstallPlanAtNode(target, deps_map):
plan.add(item)
install_plan.append(self.package_db[item])
for pkg in plan:
del deps_map[pkg]
if deps_map:
print("Cyclic dependencies:", " ".join(deps_map))
PrintDepsMap(deps_map)
sys.exit(1)
self.emerge.depgraph.display(install_plan)
def PrintDepsMap(deps_map):
"""Print dependency graph, for each package list it's prerequisites."""
for i in sorted(deps_map):
print("%s: (%s) needs" % (i, deps_map[i]["action"]))
needs = deps_map[i]["needs"]
for j in sorted(needs):
print(" %s" % (j))
if not needs:
print(" no dependencies")
class EmergeJobState(object):
"""Structure describing the EmergeJobState."""
__slots__ = ["done", "filename", "last_notify_timestamp", "last_output_seek",
"last_output_timestamp", "pkgname", "retcode", "start_timestamp",
"target", "fetch_only", "unpack_only"]
def __init__(self, target, pkgname, done, filename, start_timestamp,
retcode=None, fetch_only=False, unpack_only=False):
# The full name of the target we're building (e.g.
# virtual/target-os-1-r60)
self.target = target
# The short name of the target we're building (e.g. target-os-1-r60)
self.pkgname = pkgname
# Whether the job is done. (True if the job is done; false otherwise.)
self.done = done
# The filename where output is currently stored.
self.filename = filename
# The timestamp of the last time we printed the name of the log file. We
# print this at the beginning of the job, so this starts at
# start_timestamp.
self.last_notify_timestamp = start_timestamp
# The location (in bytes) of the end of the last complete line we printed.
# This starts off at zero. We use this to jump to the right place when we
# print output from the same ebuild multiple times.
self.last_output_seek = 0
# The timestamp of the last time we printed output. Since we haven't
# printed output yet, this starts at zero.
self.last_output_timestamp = 0
# The return code of our job, if the job is actually finished.
self.retcode = retcode
# Was this just a fetch job?
self.fetch_only = fetch_only
# The timestamp when our job started.
self.start_timestamp = start_timestamp
# No emerge, only unpack packages.
self.unpack_only = unpack_only
def KillHandler(_signum, _frame):
# Kill self and all subprocesses.
os.killpg(0, signal.SIGKILL)
def SetupWorkerSignals():
def ExitHandler(_signum, _frame):
# Set KILLED flag.
KILLED.set()
# Remove our signal handlers so we don't get called recursively.
signal.signal(signal.SIGINT, KillHandler)
signal.signal(signal.SIGTERM, KillHandler)
# Ensure that we exit quietly and cleanly, if possible, when we receive
# SIGTERM or SIGINT signals. By default, when the user hits CTRL-C, all
# of the child processes will print details about KeyboardInterrupt
# exceptions, which isn't very helpful.
signal.signal(signal.SIGINT, ExitHandler)
signal.signal(signal.SIGTERM, ExitHandler)
def EmergeProcess(output, target, *args, **kwargs):
"""Merge a package in a subprocess.
Args:
output: Temporary file to write output.
target: The package we'll be processing (for display purposes).
*args: Arguments to pass to Scheduler constructor.
**kwargs: Keyword arguments to pass to Scheduler constructor.
Returns:
The exit code returned by the subprocess.
"""
pid = os.fork()
if pid == 0:
try:
proctitle.settitle('EmergeProcess', target)
# Sanity checks.
if sys.stdout.fileno() != 1:
raise Exception("sys.stdout.fileno() != 1")
if sys.stderr.fileno() != 2:
raise Exception("sys.stderr.fileno() != 2")
# - Redirect 1 (stdout) and 2 (stderr) at our temporary file.
# - Redirect 0 to point at sys.stdin. In this case, sys.stdin
# points at a file reading os.devnull, because multiprocessing mucks
# with sys.stdin.
# - Leave the sys.stdin and output filehandles alone.
fd_pipes = {0: sys.stdin.fileno(),
1: output.fileno(),
2: output.fileno(),
sys.stdin.fileno(): sys.stdin.fileno(),
output.fileno(): output.fileno()}
# pylint: disable=W0212
portage.process._setup_pipes(fd_pipes, close_fds=False)
# Portage doesn't like when sys.stdin.fileno() != 0, so point sys.stdin
# at the filehandle we just created in _setup_pipes.
if sys.stdin.fileno() != 0:
sys.__stdin__ = sys.stdin = os.fdopen(0, "r")
scheduler = Scheduler(*args, **kwargs)
# Enable blocker handling even though we're in --nodeps mode. This
# allows us to unmerge the blocker after we've merged the replacement.
scheduler._opts_ignore_blockers = frozenset()
# Actually do the merge.
retval = scheduler.merge()
# We catch all exceptions here (including SystemExit, KeyboardInterrupt,
# etc) so as to ensure that we don't confuse the multiprocessing module,
# which expects that all forked children exit with os._exit().
# pylint: disable=W0702
except:
traceback.print_exc(file=output)
retval = 1
sys.stdout.flush()
sys.stderr.flush()
output.flush()
# pylint: disable=W0212
os._exit(retval)
else:
# Return the exit code of the subprocess.
return os.waitpid(pid, 0)[1]
def UnpackPackage(pkg_state):
"""Unpacks package described by pkg_state.
Args:
pkg_state: EmergeJobState object describing target.
Returns:
Exit code returned by subprocess.
"""
pkgdir = os.environ.get("PKGDIR",
os.path.join(os.environ["SYSROOT"], "packages"))
root = os.environ.get("ROOT", os.environ["SYSROOT"])
path = os.path.join(pkgdir, pkg_state.target + ".tbz2")
comp = cros_build_lib.FindCompressor(cros_build_lib.COMP_BZIP2)
cmd = [comp, "-dc"]
if comp.endswith("pbzip2"):
cmd.append("--ignore-trailing-garbage=1")
cmd.append(path)
result = cros_build_lib.RunCommand(cmd, cwd=root, stdout_to_pipe=True,
print_cmd=False, error_code_ok=True)
# If we were not successful, return now and don't attempt untar.
if result.returncode:
return result.returncode
cmd = ["sudo", "tar", "-xf", "-", "-C", root]
result = cros_build_lib.RunCommand(cmd, cwd=root, input=result.output,
print_cmd=False, error_code_ok=True)
return result.returncode
def EmergeWorker(task_queue, job_queue, emerge, package_db, fetch_only=False,
unpack_only=False):
"""This worker emerges any packages given to it on the task_queue.
Args:
task_queue: The queue of tasks for this worker to do.
job_queue: The queue of results from the worker.
emerge: An EmergeData() object.
package_db: A dict, mapping package ids to portage Package objects.
fetch_only: A bool, indicating if we should just fetch the target.
unpack_only: A bool, indicating if we should just unpack the target.
It expects package identifiers to be passed to it via task_queue. When
a task is started, it pushes the (target, filename) to the started_queue.
The output is stored in filename. When a merge starts or finishes, we push
EmergeJobState objects to the job_queue.
"""
if fetch_only:
mode = 'fetch'
elif unpack_only:
mode = 'unpack'
else:
mode = 'emerge'
proctitle.settitle('EmergeWorker', mode, '[idle]')
SetupWorkerSignals()
settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb
# Disable flushing of caches to save on I/O.
root = emerge.settings["ROOT"]
vardb = emerge.trees[root]["vartree"].dbapi
vardb._flush_cache_enabled = False # pylint: disable=protected-access
bindb = emerge.trees[root]["bintree"].dbapi
# Might be a set, might be a list, might be None; no clue, just use shallow
# copy to ensure we can roll it back.
# pylint: disable=W0212
original_remotepkgs = copy.copy(bindb.bintree._remotepkgs)
opts, spinner = emerge.opts, emerge.spinner
opts["--nodeps"] = True
if fetch_only:
opts["--fetchonly"] = True
while True:
# Wait for a new item to show up on the queue. This is a blocking wait,
# so if there's nothing to do, we just sit here.
pkg_state = task_queue.get()
if pkg_state is None:
# If target is None, this means that the main thread wants us to quit.
# The other workers need to exit too, so we'll push the message back on
# to the queue so they'll get it too.
task_queue.put(None)
return
if KILLED.is_set():
return
target = pkg_state.target
proctitle.settitle('EmergeWorker', mode, target)
db_pkg = package_db[target]
if db_pkg.type_name == "binary":
if not fetch_only and pkg_state.fetched_successfully:
# Ensure portage doesn't think our pkg is remote- else it'll force
# a redownload of it (even if the on-disk file is fine). In-memory
# caching basically, implemented dumbly.
bindb.bintree._remotepkgs = None
else:
bindb.bintree_remotepkgs = original_remotepkgs
db_pkg.root_config = emerge.root_config
install_list = [db_pkg]
pkgname = db_pkg.pf
output = tempfile.NamedTemporaryFile(prefix=pkgname + "-", delete=False)
os.chmod(output.name, 644)
start_timestamp = time.time()
job = EmergeJobState(target, pkgname, False, output.name, start_timestamp,
fetch_only=fetch_only, unpack_only=unpack_only)
job_queue.put(job)
if "--pretend" in opts:
retcode = 0
else:
try:
emerge.scheduler_graph.mergelist = install_list
if unpack_only:
retcode = UnpackPackage(pkg_state)
else:
retcode = EmergeProcess(output, target, settings, trees, mtimedb,
opts, spinner, favorites=emerge.favorites,
graph_config=emerge.scheduler_graph)
except Exception:
traceback.print_exc(file=output)
retcode = 1
output.close()
if KILLED.is_set():
return
job = EmergeJobState(target, pkgname, True, output.name, start_timestamp,
retcode, fetch_only=fetch_only,
unpack_only=unpack_only)
job_queue.put(job)
# Set the title back to idle as the multiprocess pool won't destroy us;
# when another job comes up, it'll re-use this process.
proctitle.settitle('EmergeWorker', mode, '[idle]')
class LinePrinter(object):
"""Helper object to print a single line."""
def __init__(self, line):
self.line = line
def Print(self, _seek_locations):
print(self.line)
class JobPrinter(object):
"""Helper object to print output of a job."""
def __init__(self, job, unlink=False):
"""Print output of job.
If unlink is True, unlink the job output file when done.
"""
self.current_time = time.time()
self.job = job
self.unlink = unlink
def Print(self, seek_locations):
job = self.job
# Calculate how long the job has been running.
seconds = self.current_time - job.start_timestamp
# Note that we've printed out the job so far.
job.last_output_timestamp = self.current_time
# Note that we're starting the job
info = "job %s (%dm%.1fs)" % (job.pkgname, seconds / 60, seconds % 60)
last_output_seek = seek_locations.get(job.filename, 0)
if last_output_seek:
print("=== Continue output for %s ===" % info)
else:
print("=== Start output for %s ===" % info)
# Print actual output from job
f = codecs.open(job.filename, encoding='utf-8', errors='replace')
f.seek(last_output_seek)
prefix = job.pkgname + ":"
for line in f:
# Save off our position in the file
if line and line[-1] == "\n":
last_output_seek = f.tell()
line = line[:-1]
# Print our line
print(prefix, line.encode('utf-8', 'replace'))
f.close()
# Save our last spot in the file so that we don't print out the same
# location twice.
seek_locations[job.filename] = last_output_seek
# Note end of output section
if job.done:
print("=== Complete: %s ===" % info)
else:
print("=== Still running: %s ===" % info)
if self.unlink:
os.unlink(job.filename)
def PrintWorker(queue):
"""A worker that prints stuff to the screen as requested."""
proctitle.settitle('PrintWorker')
def ExitHandler(_signum, _frame):
# Set KILLED flag.
KILLED.set()
# Switch to default signal handlers so that we'll die after two signals.
signal.signal(signal.SIGINT, KillHandler)
signal.signal(signal.SIGTERM, KillHandler)
# Don't exit on the first SIGINT / SIGTERM, because the parent worker will
# handle it and tell us when we need to exit.
signal.signal(signal.SIGINT, ExitHandler)
signal.signal(signal.SIGTERM, ExitHandler)
# seek_locations is a map indicating the position we are at in each file.
# It starts off empty, but is set by the various Print jobs as we go along
# to indicate where we left off in each file.
seek_locations = {}
while True:
try:
job = queue.get()
if job:
job.Print(seek_locations)
sys.stdout.flush()
else:
break
except IOError as ex:
if ex.errno == errno.EINTR:
# Looks like we received a signal. Keep printing.
continue
raise
class TargetState(object):
"""Structure descriting the TargetState."""
__slots__ = ("target", "info", "score", "prefetched", "fetched_successfully")
def __init__(self, target, info):
self.target, self.info = target, info
self.fetched_successfully = False
self.prefetched = False
self.score = None
self.update_score()
def __cmp__(self, other):
return cmp(self.score, other.score)
def update_score(self):
self.score = (
-len(self.info["tprovides"]),
len(self.info["needs"]),
not self.info["binary"],
-len(self.info["provides"]),
self.info["idx"],
self.target,
)
class ScoredHeap(object):
"""Implementation of a general purpose scored heap."""
__slots__ = ("heap", "_heap_set")
def __init__(self, initial=()):
self.heap = list()
self._heap_set = set()
if initial:
self.multi_put(initial)
def get(self):
item = heapq.heappop(self.heap)
self._heap_set.remove(item.target)
return item
def put(self, item):
if not isinstance(item, TargetState):
raise ValueError("Item %r isn't a TargetState" % (item,))
heapq.heappush(self.heap, item)
self._heap_set.add(item.target)
def multi_put(self, sequence):
sequence = list(sequence)
self.heap.extend(sequence)
self._heap_set.update(x.target for x in sequence)
self.sort()
def sort(self):
heapq.heapify(self.heap)
def __contains__(self, target):
return target in self._heap_set
def __nonzero__(self):
return bool(self.heap)
def __len__(self):
return len(self.heap)
class EmergeQueue(object):
"""Class to schedule emerge jobs according to a dependency graph."""
def __init__(self, deps_map, emerge, package_db, show_output, unpack_only,
max_retries):
# Store the dependency graph.
self._deps_map = deps_map
self._state_map = {}
# Initialize the running queue to empty
self._build_jobs = {}
self._build_ready = ScoredHeap()
self._fetch_jobs = {}
self._fetch_ready = ScoredHeap()
self._unpack_jobs = {}
self._unpack_ready = ScoredHeap()
# List of total package installs represented in deps_map.
install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"]
self._total_jobs = len(install_jobs)
self._show_output = show_output
self._unpack_only = unpack_only
self._max_retries = max_retries
if "--pretend" in emerge.opts:
print("Skipping merge because of --pretend mode.")
sys.exit(0)
# Set up a session so we can easily terminate all children.
self._SetupSession()
# Setup scheduler graph object. This is used by the child processes
# to help schedule jobs.
emerge.scheduler_graph = emerge.depgraph.schedulerGraph()
# Calculate how many jobs we can run in parallel. We don't want to pass
# the --jobs flag over to emerge itself, because that'll tell emerge to
# hide its output, and said output is quite useful for debugging hung
# jobs.
procs = min(self._total_jobs,
emerge.opts.pop("--jobs", multiprocessing.cpu_count()))
self._build_procs = self._unpack_procs = max(1, procs)
# Fetch is IO bound, we can use more processes.
self._fetch_procs = max(4, procs)
self._load_avg = emerge.opts.pop("--load-average", None)
self._job_queue = multiprocessing.Queue()
self._print_queue = multiprocessing.Queue()
self._fetch_queue = multiprocessing.Queue()
args = (self._fetch_queue, self._job_queue, emerge, package_db, True)
self._fetch_pool = multiprocessing.Pool(self._fetch_procs, EmergeWorker,
args)
self._build_queue = multiprocessing.Queue()
args = (self._build_queue, self._job_queue, emerge, package_db)
self._build_pool = multiprocessing.Pool(self._build_procs, EmergeWorker,
args)
if self._unpack_only:
# Unpack pool only required on unpack_only jobs.
self._unpack_queue = multiprocessing.Queue()
args = (self._unpack_queue, self._job_queue, emerge, package_db, False,
True)
self._unpack_pool = multiprocessing.Pool(self._unpack_procs, EmergeWorker,
args)
self._print_worker = multiprocessing.Process(target=PrintWorker,
args=[self._print_queue])
self._print_worker.start()
# Initialize the failed queue to empty.
self._retry_queue = []
self._failed_count = dict()
# Setup an exit handler so that we print nice messages if we are
# terminated.
self._SetupExitHandler()
# Schedule our jobs.
self._state_map.update(
(pkg, TargetState(pkg, data)) for pkg, data in deps_map.iteritems())
self._fetch_ready.multi_put(self._state_map.itervalues())
def _SetupSession(self):
"""Set up a session so we can easily terminate all children."""
# When we call os.setsid(), this sets up a session / process group for this
# process and all children. These session groups are needed so that we can
# easily kill all children (including processes launched by emerge) before
# we exit.
#
# One unfortunate side effect of os.setsid() is that it blocks CTRL-C from
# being received. To work around this, we only call os.setsid() in a forked
# process, so that the parent can still watch for CTRL-C. The parent will
# just sit around, watching for signals and propagating them to the child,
# until the child exits.
#
# TODO(davidjames): It would be nice if we could replace this with cgroups.
pid = os.fork()
if pid == 0:
os.setsid()
else:
proctitle.settitle('SessionManager')
def PropagateToChildren(signum, _frame):
# Just propagate the signals down to the child. We'll exit when the
# child does.
try:
os.kill(pid, signum)
except OSError as ex:
if ex.errno != errno.ESRCH:
raise
signal.signal(signal.SIGINT, PropagateToChildren)
signal.signal(signal.SIGTERM, PropagateToChildren)
def StopGroup(_signum, _frame):
# When we get stopped, stop the children.
try:
os.killpg(pid, signal.SIGSTOP)
os.kill(0, signal.SIGSTOP)
except OSError as ex:
if ex.errno != errno.ESRCH:
raise
signal.signal(signal.SIGTSTP, StopGroup)
def ContinueGroup(_signum, _frame):
# Launch the children again after being stopped.
try:
os.killpg(pid, signal.SIGCONT)
except OSError as ex:
if ex.errno != errno.ESRCH:
raise
signal.signal(signal.SIGCONT, ContinueGroup)
# Loop until the children exit. We exit with os._exit to be sure we
# don't run any finalizers (those will be run by the child process.)
# pylint: disable=W0212
while True:
try:
# Wait for the process to exit. When it does, exit with the return
# value of the subprocess.
os._exit(process_util.GetExitStatus(os.waitpid(pid, 0)[1]))
except OSError as ex:
if ex.errno == errno.EINTR:
continue
traceback.print_exc()
os._exit(1)
except BaseException:
traceback.print_exc()
os._exit(1)
def _SetupExitHandler(self):
def ExitHandler(signum, _frame):
# Set KILLED flag.
KILLED.set()
# Kill our signal handlers so we don't get called recursively
signal.signal(signal.SIGINT, KillHandler)
signal.signal(signal.SIGTERM, KillHandler)
# Print our current job status
for job in self._build_jobs.itervalues():
if job:
self._print_queue.put(JobPrinter(job, unlink=True))
# Notify the user that we are exiting
self._Print("Exiting on signal %s" % signum)
self._print_queue.put(None)
self._print_worker.join()
# Kill child threads, then exit.
os.killpg(0, signal.SIGKILL)
sys.exit(1)
# Print out job status when we are killed
signal.signal(signal.SIGINT, ExitHandler)
signal.signal(signal.SIGTERM, ExitHandler)
def _ScheduleUnpack(self, pkg_state):
self._unpack_jobs[pkg_state.target] = None
self._unpack_queue.put(pkg_state)
def _Schedule(self, pkg_state):
# We maintain a tree of all deps, if this doesn't need
# to be installed just free up its children and continue.
# It is possible to reinstall deps of deps, without reinstalling
# first level deps, like so:
# virtual/target-os (merge) -> eselect (nomerge) -> python (merge)
this_pkg = pkg_state.info
target = pkg_state.target
if pkg_state.info is not None:
if this_pkg["action"] == "nomerge":
self._Finish(target)
elif target not in self._build_jobs:
# Kick off the build if it's marked to be built.
self._build_jobs[target] = None
self._build_queue.put(pkg_state)
return True
def _ScheduleLoop(self, unpack_only=False):
if unpack_only:
ready_queue = self._unpack_ready
jobs_queue = self._unpack_jobs
procs = self._unpack_procs
else:
ready_queue = self._build_ready
jobs_queue = self._build_jobs
procs = self._build_procs
# If the current load exceeds our desired load average, don't schedule
# more than one job.
if self._load_avg and os.getloadavg()[0] > self._load_avg:
needed_jobs = 1
else:
needed_jobs = procs
# Schedule more jobs.
while ready_queue and len(jobs_queue) < needed_jobs:
state = ready_queue.get()
if unpack_only:
self._ScheduleUnpack(state)
else:
if state.target not in self._failed_count:
self._Schedule(state)
def _Print(self, line):
"""Print a single line."""
self._print_queue.put(LinePrinter(line))
def _Status(self):
"""Print status."""
current_time = time.time()
current_time_struct = time.localtime(current_time)
no_output = True
# Print interim output every minute if --show-output is used. Otherwise,
# print notifications about running packages every 2 minutes, and print
# full output for jobs that have been running for 60 minutes or more.
if self._show_output:
interval = 60
notify_interval = 0
else:
interval = 60 * 60
notify_interval = 60 * 2
for job in self._build_jobs.itervalues():
if job:
last_timestamp = max(job.start_timestamp, job.last_output_timestamp)
if last_timestamp + interval < current_time:
self._print_queue.put(JobPrinter(job))
job.last_output_timestamp = current_time
no_output = False
elif (notify_interval and
job.last_notify_timestamp + notify_interval < current_time):
job_seconds = current_time - job.start_timestamp
args = (job.pkgname, job_seconds / 60, job_seconds % 60, job.filename)
info = "Still building %s (%dm%.1fs). Logs in %s" % args
job.last_notify_timestamp = current_time
self._Print(info)
no_output = False
# If we haven't printed any messages yet, print a general status message
# here.
if no_output:
seconds = current_time - GLOBAL_START
fjobs, fready = len(self._fetch_jobs), len(self._fetch_ready)
ujobs, uready = len(self._unpack_jobs), len(self._unpack_ready)
bjobs, bready = len(self._build_jobs), len(self._build_ready)
retries = len(self._retry_queue)
pending = max(0, len(self._deps_map) - fjobs - bjobs)
line = "Pending %s/%s, " % (pending, self._total_jobs)
if fjobs or fready:
line += "Fetching %s/%s, " % (fjobs, fready + fjobs)
if ujobs or uready:
line += "Unpacking %s/%s, " % (ujobs, uready + ujobs)
if bjobs or bready or retries:
line += "Building %s/%s, " % (bjobs, bready + bjobs)
if retries:
line += "Retrying %s, " % (retries,)
load = " ".join(str(x) for x in os.getloadavg())
line += ("[Time %s | Elapsed %dm%.1fs | Load %s]" % (
time.strftime('%H:%M:%S', current_time_struct), seconds / 60,
seconds % 60, load))
self._Print(line)
def _Finish(self, target):
"""Mark a target as completed and unblock dependencies."""
this_pkg = self._deps_map[target]
if this_pkg["needs"] and this_pkg["nodeps"]:
# We got installed, but our deps have not been installed yet. Dependent
# packages should only be installed when our needs have been fully met.
this_pkg["action"] = "nomerge"
else:
for dep in this_pkg["provides"]:
dep_pkg = self._deps_map[dep]
state = self._state_map[dep]
del dep_pkg["needs"][target]
state.update_score()
if not state.prefetched:
if dep in self._fetch_ready:
# If it's not currently being fetched, update the prioritization
self._fetch_ready.sort()
elif not dep_pkg["needs"]:
if dep_pkg["nodeps"] and dep_pkg["action"] == "nomerge":
self._Finish(dep)
else:
self._build_ready.put(self._state_map[dep])
self._deps_map.pop(target)
def _Retry(self):
while self._retry_queue:
state = self._retry_queue.pop(0)
if self._Schedule(state):
self._Print("Retrying emerge of %s." % state.target)
break
def _Shutdown(self):
# Tell emerge workers to exit. They all exit when 'None' is pushed
# to the queue.
# Shutdown the workers first; then jobs (which is how they feed things back)
# then finally the print queue.
def _stop(queue, pool):
if pool is None:
return
try:
queue.put(None)
pool.close()
pool.join()
finally:
pool.terminate()
_stop(self._fetch_queue, self._fetch_pool)
self._fetch_queue = self._fetch_pool = None
_stop(self._build_queue, self._build_pool)
self._build_queue = self._build_pool = None
if self._unpack_only:
_stop(self._unpack_queue, self._unpack_pool)
self._unpack_queue = self._unpack_pool = None
if self._job_queue is not None:
self._job_queue.close()
self._job_queue = None
# Now that our workers are finished, we can kill the print queue.
if self._print_worker is not None:
try:
self._print_queue.put(None)
self._print_queue.close()
self._print_worker.join()
finally:
self._print_worker.terminate()
self._print_queue = self._print_worker = None
def Run(self):
"""Run through the scheduled ebuilds.
Keep running so long as we have uninstalled packages in the
dependency graph to merge.
"""
if not self._deps_map:
return
# Start the fetchers.
for _ in xrange(min(self._fetch_procs, len(self._fetch_ready))):
state = self._fetch_ready.get()
self._fetch_jobs[state.target] = None
self._fetch_queue.put(state)
# Print an update, then get going.
self._Status()
while self._deps_map:
# Check here that we are actually waiting for something.
if (self._build_queue.empty() and
self._job_queue.empty() and
not self._fetch_jobs and
not self._fetch_ready and
not self._unpack_jobs and
not self._unpack_ready and
not self._build_jobs and
not self._build_ready and
self._deps_map):
# If we have failed on a package, retry it now.
if self._retry_queue:
self._Retry()
else:
# Tell the user why we're exiting.
if self._failed_count:
print('Packages failed:\n\t%s' %
'\n\t'.join(self._failed_count.iterkeys()))
status_file = os.environ.get("PARALLEL_EMERGE_STATUS_FILE")
if status_file:
failed_pkgs = set(portage.versions.cpv_getkey(x)
for x in self._failed_count.iterkeys())
with open(status_file, "a") as f:
f.write("%s\n" % " ".join(failed_pkgs))
else:
print("Deadlock! Circular dependencies!")
sys.exit(1)
for _ in xrange(12):
try:
job = self._job_queue.get(timeout=5)
break
except Queue.Empty:
# Check if any more jobs can be scheduled.
self._ScheduleLoop()
else:
# Print an update every 60 seconds.
self._Status()
continue
target = job.target
if job.fetch_only:
if not job.done:
self._fetch_jobs[job.target] = job
else:
state = self._state_map[job.target]
state.prefetched = True
state.fetched_successfully = (job.retcode == 0)
del self._fetch_jobs[job.target]
self._Print("Fetched %s in %2.2fs"
% (target, time.time() - job.start_timestamp))
if self._show_output or job.retcode != 0:
self._print_queue.put(JobPrinter(job, unlink=True))
else:
os.unlink(job.filename)
# Failure or not, let build work with it next.
if not self._deps_map[job.target]["needs"]:
self._build_ready.put(state)
self._ScheduleLoop()
if self._unpack_only and job.retcode == 0:
self._unpack_ready.put(state)
self._ScheduleLoop(unpack_only=True)
if self._fetch_ready:
state = self._fetch_ready.get()
self._fetch_queue.put(state)
self._fetch_jobs[state.target] = None
else:
# Minor optimization; shut down fetchers early since we know
# the queue is empty.
self._fetch_queue.put(None)
continue
if job.unpack_only:
if not job.done:
self._unpack_jobs[target] = job
else:
del self._unpack_jobs[target]
self._Print("Unpacked %s in %2.2fs"
% (target, time.time() - job.start_timestamp))
if self._show_output or job.retcode != 0:
self._print_queue.put(JobPrinter(job, unlink=True))
else:
os.unlink(job.filename)
if self._unpack_ready:
state = self._unpack_ready.get()
self._unpack_queue.put(state)
self._unpack_jobs[state.target] = None
continue
if not job.done:
self._build_jobs[target] = job
self._Print("Started %s (logged in %s)" % (target, job.filename))
continue
# Print output of job
if self._show_output or job.retcode != 0:
self._print_queue.put(JobPrinter(job, unlink=True))
else:
os.unlink(job.filename)
del self._build_jobs[target]
seconds = time.time() - job.start_timestamp
details = "%s (in %dm%.1fs)" % (target, seconds / 60, seconds % 60)
# Complain if necessary.
if job.retcode != 0:
# Handle job failure.
failed_count = self._failed_count.get(target, 0)
if failed_count >= self._max_retries:
# If this job has failed and can't be retried, give up.
self._Print("Failed %s. Your build has failed." % details)
else:
# Queue up this build to try again after a long while.
self._retry_queue.append(self._state_map[target])
self._failed_count[target] = failed_count + 1
self._Print("Failed %s, retrying later." % details)
else:
self._Print("Completed %s" % details)
# Mark as completed and unblock waiting ebuilds.
self._Finish(target)
if target in self._failed_count and self._retry_queue:
# If we have successfully retried a failed package, and there
# are more failed packages, try the next one. We will only have
# one retrying package actively running at a time.
self._Retry()
# Schedule pending jobs and print an update.
self._ScheduleLoop()
self._Status()
# If packages were retried, output a warning.
if self._failed_count:
self._Print("")
self._Print("WARNING: The following packages failed once or more,")
self._Print("but succeeded upon retry. This might indicate incorrect")
self._Print("dependencies.")
for pkg in self._failed_count.iterkeys():
self._Print(" %s" % pkg)
self._Print("@@@STEP_WARNINGS@@@")
self._Print("")
# Tell child threads to exit.
self._Print("Merge complete")
def main(argv):
try:
return real_main(argv)
finally:
# Work around multiprocessing sucking and not cleaning up after itself.
# http://bugs.python.org/issue4106;
# Step one; ensure GC is ran *prior* to the VM starting shutdown.
gc.collect()
# Step two; go looking for those threads and try to manually reap
# them if we can.
for x in threading.enumerate():
# Filter on the name, and ident; if ident is None, the thread
# wasn't started.
if x.name == 'QueueFeederThread' and x.ident is not None:
x.join(1)
def real_main(argv):
parallel_emerge_args = argv[:]
deps = DepGraphGenerator()
deps.Initialize(parallel_emerge_args)
emerge = deps.emerge
if emerge.action is not None:
argv = deps.ParseParallelEmergeArgs(argv)
return emerge_main(argv)
elif not emerge.cmdline_packages:
Usage()
return 1
# Unless we're in pretend mode, there's not much point running without
# root access. We need to be able to install packages.
#
# NOTE: Even if you're running --pretend, it's a good idea to run
# parallel_emerge with root access so that portage can write to the
# dependency cache. This is important for performance.
if "--pretend" not in emerge.opts and portage.data.secpass < 2:
print("parallel_emerge: superuser access is required.")
return 1
if "--quiet" not in emerge.opts:
cmdline_packages = " ".join(emerge.cmdline_packages)
print("Starting fast-emerge.")
print(" Building package %s on %s" % (cmdline_packages,
deps.sysroot or "root"))
deps_tree, deps_info = deps.GenDependencyTree()
# You want me to be verbose? I'll give you two trees! Twice as much value.
if "--tree" in emerge.opts and "--verbose" in emerge.opts:
deps.PrintTree(deps_tree)
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
# OK, time to print out our progress so far.
deps.PrintInstallPlan(deps_graph)
if "--tree" in emerge.opts:
PrintDepsMap(deps_graph)
# Are we upgrading portage? If so, and there are more packages to merge,
# schedule a restart of parallel_emerge to merge the rest. This ensures that
# we pick up all updates to portage settings before merging any more
# packages.
portage_upgrade = False
root = emerge.settings["ROOT"]
# pylint: disable=W0212
if root == "/":
final_db = emerge.depgraph._dynamic_config._filtered_trees[root]['graph_db']
for db_pkg in final_db.match_pkgs("sys-apps/portage"):
portage_pkg = deps_graph.get(db_pkg.cpv)
if portage_pkg:
portage_upgrade = True
if "--quiet" not in emerge.opts:
print("Upgrading portage first, then restarting...")
# Upgrade Portage first, then the rest of the packages.
#
# In order to grant the child permission to run setsid, we need to run sudo
# again. We preserve SUDO_USER here in case an ebuild depends on it.
if portage_upgrade:
# Calculate what arguments to use when re-invoking.
args = ["sudo", "-E", "SUDO_USER=%s" % os.environ.get("SUDO_USER", "")]
args += [os.path.abspath(sys.argv[0])] + parallel_emerge_args
args += ["--exclude=sys-apps/portage"]
# First upgrade Portage.
passthrough_args = ("--quiet", "--pretend", "--verbose")
emerge_args = [k for k in emerge.opts if k in passthrough_args]
ret = emerge_main(emerge_args + ["portage"])
if ret != 0:
return ret
# Now upgrade the rest.
os.execvp(args[0], args)
# Attempt to solve crbug.com/433482
# The file descriptor error appears only when getting userpriv_groups
# (lazily generated). Loading userpriv_groups here will reduce the number of
# calls from few hundreds to one.
portage.data._get_global('userpriv_groups')
# Run the queued emerges.
scheduler = EmergeQueue(deps_graph, emerge, deps.package_db, deps.show_output,
deps.unpack_only, deps.max_retries)
try:
scheduler.Run()
finally:
# pylint: disable=W0212
scheduler._Shutdown()
scheduler = None
clean_logs(emerge.settings)
print("Done")
return 0