mirror of
https://github.com/flatcar/scripts.git
synced 2025-08-07 21:16:57 +02:00
Previously, parallel_emerge only broke counter-plan dependencies if they were mutually cyclic. This doesn't work, because these deps might be involved in an unrelated cycle. To fix this, we break all counter-plan dependencies that are involved in any cycle. This fixes make_chroot --fast, which currently fails with a deadlock. Besides the above, I've also added additional cycle-checking to the install plan generation code so that cyclic dependencies are caught earlier. I also cleaned up FindCyclesAtNode to be a bit more understandable -- I wasn't 100% sure that it was actually correct so I cleaned it up so that it was easier for me to understand that it is in fact correct. Not sure that this part fixes any bugs, but it's easier for me to analyze the behavior of FindCyclesAtNode now. TEST=make_chroot --fast (Note that make_chroot --fast is deprecated, but it's good to fix the cycle cracking anyway.) BUG=chromium-os:5795 Review URL: http://codereview.chromium.org/3156018
1576 lines
58 KiB
Python
Executable File
1576 lines
58 KiB
Python
Executable File
#!/usr/bin/python2.6
|
|
# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Program to run emerge in parallel, for significant speedup.
|
|
|
|
Usage:
|
|
./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]
|
|
[emerge args] package"
|
|
|
|
Basic operation:
|
|
Runs 'emerge -p --debug' to display dependencies, and stores a
|
|
dependency graph. All non-blocked packages are launched in parallel,
|
|
as 'emerge --nodeps package' with any blocked packages being emerged
|
|
immediately upon deps being met.
|
|
|
|
For this to work effectively, /usr/lib/portage/pym/portage/locks.py
|
|
must be stubbed out, preventing portage from slowing itself with
|
|
unneccesary locking, as this script ensures that emerge is run in such
|
|
a way that common resources are never in conflict. This is controlled
|
|
by an environment variable PORTAGE_LOCKS set in parallel emerge
|
|
subprocesses.
|
|
|
|
Parallel Emerge unlocks two things during operation, here's what you
|
|
must do to keep this safe:
|
|
* Storage dir containing binary packages. - Don't emerge new
|
|
packages while installing the existing ones.
|
|
* Portage database - You must not examine deps while modifying the
|
|
database. Therefore you may only parallelize "-p" read only access,
|
|
or "--nodeps" write only access.
|
|
Caveats:
|
|
* Some ebuild packages have incorrectly specified deps, and running
|
|
them in parallel is more likely to bring out these failures.
|
|
* Some ebuilds (especially the build part) have complex dependencies
|
|
that are not captured well by this script (it may be necessary to
|
|
install an old package to build, but then install a newer version
|
|
of the same package for a runtime dep).
|
|
"""
|
|
|
|
import codecs
|
|
import copy
|
|
import multiprocessing
|
|
import os
|
|
import Queue
|
|
import shlex
|
|
import signal
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
import traceback
|
|
import urllib2
|
|
|
|
# If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On
|
|
# Chromium OS, the default "portage" user doesn't have the necessary
|
|
# permissions. It'd be easier if we could default to $USERNAME, but $USERNAME
|
|
# is "root" here because we get called through sudo.
|
|
#
|
|
# We need to set this before importing any portage modules, because portage
|
|
# looks up "PORTAGE_USERNAME" at import time.
|
|
#
|
|
# NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't
|
|
# encounter this case unless they have an old chroot or blow away the
|
|
# environment by running sudo without the -E specifier.
|
|
if "PORTAGE_USERNAME" not in os.environ:
|
|
homedir = os.environ.get("HOME")
|
|
if homedir:
|
|
os.environ["PORTAGE_USERNAME"] = os.path.basename(homedir)
|
|
|
|
# Portage doesn't expose dependency trees in its public API, so we have to
|
|
# make use of some private APIs here. These modules are found under
|
|
# /usr/lib/portage/pym/.
|
|
#
|
|
# TODO(davidjames): Update Portage to expose public APIs for these features.
|
|
from _emerge.actions import adjust_configs
|
|
from _emerge.actions import load_emerge_config
|
|
from _emerge.create_depgraph_params import create_depgraph_params
|
|
from _emerge.depgraph import backtrack_depgraph
|
|
from _emerge.main import emerge_main
|
|
from _emerge.main import parse_opts
|
|
from _emerge.Package import Package
|
|
from _emerge.Scheduler import Scheduler
|
|
from _emerge.stdout_spinner import stdout_spinner
|
|
import portage
|
|
import portage.debug
|
|
|
|
|
|
def Usage():
|
|
"""Print usage."""
|
|
print "Usage:"
|
|
print " ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]"
|
|
print " [--rebuild] [emerge args] package"
|
|
print
|
|
print "Packages specified as workon packages are always built from source."
|
|
print "Unless --no-workon-deps is specified, packages that depend on these"
|
|
print "packages are also built from source."
|
|
print
|
|
print "The --workon argument is mainly useful when you want to build and"
|
|
print "install packages that you are working on unconditionally, but do not"
|
|
print "to have to rev the package to indicate you want to build it from"
|
|
print "source. The build_packages script will automatically supply the"
|
|
print "workon argument to emerge, ensuring that packages selected using"
|
|
print "cros-workon are rebuilt."
|
|
print
|
|
print "The --rebuild option rebuilds packages whenever their dependencies"
|
|
print "are changed. This ensures that your build is correct."
|
|
sys.exit(1)
|
|
|
|
|
|
# These are dependencies that are not specified in the package,
|
|
# but will prevent the package from installing.
|
|
secret_deps = {}
|
|
|
|
# Global start time
|
|
GLOBAL_START = time.time()
|
|
|
|
|
|
class EmergeData(object):
|
|
"""This simple struct holds various emerge variables.
|
|
|
|
This struct helps us easily pass emerge variables around as a unit.
|
|
These variables are used for calculating dependencies and installing
|
|
packages.
|
|
"""
|
|
|
|
__slots__ = ["action", "cmdline_packages", "depgraph", "mtimedb", "opts",
|
|
"root_config", "scheduler_graph", "settings", "spinner",
|
|
"trees"]
|
|
|
|
def __init__(self):
|
|
# The action the user requested. If the user is installing packages, this
|
|
# is None. If the user is doing anything other than installing packages,
|
|
# this will contain the action name, which will map exactly to the
|
|
# long-form name of the associated emerge option.
|
|
#
|
|
# Example: If you call parallel_emerge --unmerge package, the action name
|
|
# will be "unmerge"
|
|
self.action = None
|
|
|
|
# The list of packages the user passed on the command-line.
|
|
self.cmdline_packages = None
|
|
|
|
# The emerge dependency graph. It'll contain all the packages involved in
|
|
# this merge, along with their versions.
|
|
self.depgraph = None
|
|
|
|
# A dict of the options passed to emerge. This dict has been cleaned up
|
|
# a bit by parse_opts, so that it's a bit easier for the emerge code to
|
|
# look at the options.
|
|
#
|
|
# Emerge takes a few shortcuts in its cleanup process to make parsing of
|
|
# the options dict easier. For example, if you pass in "--usepkg=n", the
|
|
# "--usepkg" flag is just left out of the dictionary altogether. Because
|
|
# --usepkg=n is the default, this makes parsing easier, because emerge
|
|
# can just assume that if "--usepkg" is in the dictionary, it's enabled.
|
|
#
|
|
# These cleanup processes aren't applied to all options. For example, the
|
|
# --with-bdeps flag is passed in as-is. For a full list of the cleanups
|
|
# applied by emerge, see the parse_opts function in the _emerge.main
|
|
# package.
|
|
self.opts = None
|
|
|
|
# A dictionary used by portage to maintain global state. This state is
|
|
# loaded from disk when portage starts up, and saved to disk whenever we
|
|
# call mtimedb.commit().
|
|
#
|
|
# This database contains information about global updates (i.e., what
|
|
# version of portage we have) and what we're currently doing. Portage
|
|
# saves what it is currently doing in this database so that it can be
|
|
# resumed when you call it with the --resume option.
|
|
#
|
|
# parallel_emerge does not save what it is currently doing in the mtimedb,
|
|
# so we do not support the --resume option.
|
|
self.mtimedb = None
|
|
|
|
# The portage configuration for our current root. This contains the portage
|
|
# settings (see below) and the three portage trees for our current root.
|
|
# (The three portage trees are explained below, in the documentation for
|
|
# the "trees" member.)
|
|
self.root_config = None
|
|
|
|
# The scheduler graph is used by emerge to calculate what packages to
|
|
# install. We don't actually install any deps, so this isn't really used,
|
|
# but we pass it in to the Scheduler object anyway.
|
|
self.scheduler_graph = None
|
|
|
|
# Portage settings for our current session. Most of these settings are set
|
|
# in make.conf inside our current install root.
|
|
self.settings = None
|
|
|
|
# The spinner, which spews stuff to stdout to indicate that portage is
|
|
# doing something. We maintain our own spinner, so we set the portage
|
|
# spinner to "silent" mode.
|
|
self.spinner = None
|
|
|
|
# The portage trees. There are separate portage trees for each root. To get
|
|
# the portage tree for the current root, you can look in self.trees[root],
|
|
# where root = self.settings["ROOT"].
|
|
#
|
|
# In each root, there are three trees: vartree, porttree, and bintree.
|
|
# - vartree: A database of the currently-installed packages.
|
|
# - porttree: A database of ebuilds, that can be used to build packages.
|
|
# - bintree: A database of binary packages.
|
|
self.trees = None
|
|
|
|
|
|
class DepGraphGenerator(object):
|
|
"""Grab dependency information about packages from portage.
|
|
|
|
Typical usage:
|
|
deps = DepGraphGenerator()
|
|
deps.Initialize(sys.argv[1:])
|
|
deps_tree, deps_info = deps.GenDependencyTree()
|
|
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
|
|
deps.PrintTree(deps_tree)
|
|
PrintDepsMap(deps_graph)
|
|
"""
|
|
|
|
__slots__ = ["board", "emerge", "mandatory_source", "no_workon_deps",
|
|
"package_db", "rebuild", "show_output"]
|
|
|
|
def __init__(self):
|
|
self.board = None
|
|
self.emerge = EmergeData()
|
|
self.mandatory_source = set()
|
|
self.no_workon_deps = False
|
|
self.package_db = {}
|
|
self.rebuild = False
|
|
self.show_output = False
|
|
|
|
def ParseParallelEmergeArgs(self, argv):
|
|
"""Read the parallel emerge arguments from the command-line.
|
|
|
|
We need to be compatible with emerge arg format. We scrape arguments that
|
|
are specific to parallel_emerge, and pass through the rest directly to
|
|
emerge.
|
|
Args:
|
|
argv: arguments list
|
|
Returns:
|
|
Arguments that don't belong to parallel_emerge
|
|
"""
|
|
emerge_args = []
|
|
for arg in argv:
|
|
# Specifically match arguments that are specific to parallel_emerge, and
|
|
# pass through the rest.
|
|
if arg.startswith("--board="):
|
|
self.board = arg.replace("--board=", "")
|
|
elif arg.startswith("--workon="):
|
|
workon_str = arg.replace("--workon=", "")
|
|
package_list = shlex.split(" ".join(shlex.split(workon_str)))
|
|
self.mandatory_source.update(package_list)
|
|
elif arg == "--no-workon-deps":
|
|
self.no_workon_deps = True
|
|
elif arg == "--rebuild":
|
|
self.rebuild = True
|
|
elif arg == "--show-output":
|
|
self.show_output = True
|
|
else:
|
|
# Not one of our options, so pass through to emerge.
|
|
emerge_args.append(arg)
|
|
|
|
if self.rebuild:
|
|
if self.no_workon_deps:
|
|
print "--rebuild is not compatible with --no-workon-deps"
|
|
sys.exit(1)
|
|
|
|
return emerge_args
|
|
|
|
def Initialize(self, args):
|
|
"""Initializer. Parses arguments and sets up portage state."""
|
|
|
|
# Parse and strip out args that are just intended for parallel_emerge.
|
|
emerge_args = self.ParseParallelEmergeArgs(args)
|
|
|
|
# Setup various environment variables based on our current board. These
|
|
# variables are normally setup inside emerge-${BOARD}, but since we don't
|
|
# call that script, we have to set it up here. These variables serve to
|
|
# point our tools at /build/BOARD and to setup cross compiles to the
|
|
# appropriate board as configured in toolchain.conf.
|
|
if self.board:
|
|
os.environ["PORTAGE_CONFIGROOT"] = "/build/" + self.board
|
|
os.environ["PORTAGE_SYSROOT"] = "/build/" + self.board
|
|
os.environ["SYSROOT"] = "/build/" + self.board
|
|
scripts_dir = os.path.dirname(os.path.realpath(__file__))
|
|
toolchain_path = "%s/../overlays/overlay-%s/toolchain.conf"
|
|
# Strip the variant out of the board name to look for the toolchain. This
|
|
# is similar to what setup_board does.
|
|
board_no_variant = self.board.split('_')[0]
|
|
f = open(toolchain_path % (scripts_dir, board_no_variant))
|
|
os.environ["CHOST"] = f.readline().strip()
|
|
f.close()
|
|
|
|
# Although CHROMEOS_ROOT isn't specific to boards, it's normally setup
|
|
# inside emerge-${BOARD}, so we set it up here for compatibility. It
|
|
# will be going away soon as we migrate to CROS_WORKON_SRCROOT.
|
|
os.environ.setdefault("CHROMEOS_ROOT", os.environ["HOME"] + "/trunk")
|
|
|
|
# Modify the environment to disable locking.
|
|
os.environ["PORTAGE_LOCKS"] = "false"
|
|
|
|
# Turn off interactive delays
|
|
os.environ["EBEEP_IGNORE"] = "1"
|
|
os.environ["EPAUSE_IGNORE"] = "1"
|
|
os.environ["UNMERGE_DELAY"] = "0"
|
|
|
|
# Parse the emerge options.
|
|
action, opts, cmdline_packages = parse_opts(emerge_args)
|
|
|
|
# If we're installing to the board, we want the --root-deps option so that
|
|
# portage will install the build dependencies to that location as well.
|
|
if self.board:
|
|
opts.setdefault("--root-deps", True)
|
|
|
|
# Set environment variables based on options. Portage normally sets these
|
|
# environment variables in emerge_main, but we can't use that function,
|
|
# because it also does a bunch of other stuff that we don't want.
|
|
# TODO(davidjames): Patch portage to move this logic into a function we can
|
|
# reuse here.
|
|
if "--debug" in opts:
|
|
os.environ["PORTAGE_DEBUG"] = "1"
|
|
if "--config-root" in opts:
|
|
os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"]
|
|
if "--root" in opts:
|
|
os.environ["ROOT"] = opts["--root"]
|
|
if "--accept-properties" in opts:
|
|
os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"]
|
|
|
|
# Portage has two flags for doing collision protection: collision-protect
|
|
# and protect-owned. The protect-owned feature is enabled by default and
|
|
# is quite useful: it checks to make sure that we don't have multiple
|
|
# packages that own the same file. The collision-protect feature is more
|
|
# strict, and less useful: it fails if it finds a conflicting file, even
|
|
# if that file was created by an earlier ebuild that failed to install.
|
|
#
|
|
# We want to disable collision-protect here because we don't handle
|
|
# failures during the merge step very well. Sometimes we leave old files
|
|
# lying around and they cause problems, so for now we disable the flag.
|
|
# TODO(davidjames): Look for a better solution.
|
|
features = os.environ.get("FEATURES", "") + " -collision-protect"
|
|
|
|
# If we're cross-compiling, updating the environment every time we install
|
|
# a package isn't necessary, and leads to race conditions. Do environment
|
|
# updates at the end, instead.
|
|
if self.board:
|
|
features = features + " no-env-update"
|
|
|
|
os.environ["FEATURES"] = features
|
|
|
|
# Now that we've setup the necessary environment variables, we can load the
|
|
# emerge config from disk.
|
|
settings, trees, mtimedb = load_emerge_config()
|
|
|
|
# Check whether our portage tree is out of date. Typically, this happens
|
|
# when you're setting up a new portage tree, such as in setup_board and
|
|
# make_chroot. In that case, portage applies a bunch of global updates
|
|
# here. Once the updates are finished, we need to commit any changes
|
|
# that the global update made to our mtimedb, and reload the config.
|
|
#
|
|
# Portage normally handles this logic in emerge_main, but again, we can't
|
|
# use that function here.
|
|
if portage._global_updates(trees, mtimedb["updates"]):
|
|
mtimedb.commit()
|
|
settings, trees, mtimedb = load_emerge_config(trees=trees)
|
|
|
|
# Setup implied options. Portage normally handles this logic in
|
|
# emerge_main.
|
|
if "--buildpkgonly" in opts or "buildpkg" in settings.features:
|
|
opts.setdefault("--buildpkg", True)
|
|
if "--getbinpkgonly" in opts:
|
|
opts.setdefault("--usepkgonly", True)
|
|
opts.setdefault("--getbinpkg", True)
|
|
if "getbinpkg" in settings.features:
|
|
# Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n
|
|
opts["--getbinpkg"] = True
|
|
if "--getbinpkg" in opts or "--usepkgonly" in opts:
|
|
opts.setdefault("--usepkg", True)
|
|
if "--fetch-all-uri" in opts:
|
|
opts.setdefault("--fetchonly", True)
|
|
if "--skipfirst" in opts:
|
|
opts.setdefault("--resume", True)
|
|
if "--buildpkgonly" in opts:
|
|
# --buildpkgonly will not merge anything, so it overrides all binary
|
|
# package options.
|
|
for opt in ("--getbinpkg", "--getbinpkgonly",
|
|
"--usepkg", "--usepkgonly"):
|
|
opts.pop(opt, None)
|
|
if (settings.get("PORTAGE_DEBUG", "") == "1" and
|
|
"python-trace" in settings.features):
|
|
portage.debug.set_trace(True)
|
|
|
|
# Complain about unsupported options
|
|
for opt in ("--ask", "--ask-enter-invalid", "--complete-graph",
|
|
"--resume", "--skipfirst"):
|
|
if opt in opts:
|
|
print "%s is not supported by parallel_emerge" % opt
|
|
sys.exit(1)
|
|
|
|
# Make emerge specific adjustments to the config (e.g. colors!)
|
|
adjust_configs(opts, trees)
|
|
|
|
# Save our configuration so far in the emerge object
|
|
emerge = self.emerge
|
|
emerge.action, emerge.opts = action, opts
|
|
emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb
|
|
emerge.cmdline_packages = cmdline_packages
|
|
root = settings["ROOT"]
|
|
emerge.root_config = trees[root]["root_config"]
|
|
|
|
def GenDependencyTree(self):
|
|
"""Get dependency tree info from emerge.
|
|
|
|
TODO(): Update cros_extract_deps to also use this code.
|
|
Returns:
|
|
Dependency tree
|
|
"""
|
|
start = time.time()
|
|
|
|
# Setup emerge options.
|
|
#
|
|
# We treat dependency info a bit differently than emerge itself. Unless
|
|
# you're using --usepkgonly, we disable --getbinpkg and --usepkg here so
|
|
# that emerge will look at the dependencies of the source ebuilds rather
|
|
# than the binary dependencies. This helps ensure that we have the option
|
|
# of merging a package from source, if we want to switch to it with
|
|
# --workon and the dependencies have changed.
|
|
emerge = self.emerge
|
|
emerge_opts = emerge.opts.copy()
|
|
emerge_opts.pop("--getbinpkg", None)
|
|
if "--usepkgonly" not in emerge_opts:
|
|
emerge_opts.pop("--usepkg", None)
|
|
if self.mandatory_source or self.rebuild:
|
|
# Enable --emptytree so that we get the full tree, which we need for
|
|
# dependency analysis. By default, with this option, emerge optimizes
|
|
# the graph by removing uninstall instructions from the graph. By
|
|
# specifying --tree as well, we tell emerge that it's not safe to remove
|
|
# uninstall instructions because we're planning on analyzing the output.
|
|
emerge_opts["--tree"] = True
|
|
emerge_opts["--emptytree"] = True
|
|
|
|
# Create a list of packages to merge
|
|
packages = set(emerge.cmdline_packages[:])
|
|
if self.mandatory_source:
|
|
packages.update(self.mandatory_source)
|
|
|
|
# Tell emerge to be quiet. We print plenty of info ourselves so we don't
|
|
# need any extra output from portage.
|
|
portage.util.noiselimit = -1
|
|
|
|
# My favorite feature: The silent spinner. It doesn't spin. Ever.
|
|
# I'd disable the colors by default too, but they look kind of cool.
|
|
emerge.spinner = stdout_spinner()
|
|
emerge.spinner.update = emerge.spinner.update_quiet
|
|
|
|
if "--quiet" not in emerge.opts:
|
|
print "Calculating deps..."
|
|
|
|
# Ask portage to build a dependency graph. with the options we specified
|
|
# above.
|
|
params = create_depgraph_params(emerge_opts, emerge.action)
|
|
success, depgraph, _ = backtrack_depgraph(
|
|
emerge.settings, emerge.trees, emerge_opts, params, emerge.action,
|
|
packages, emerge.spinner)
|
|
emerge.depgraph = depgraph
|
|
|
|
# Is it impossible to honor the user's request? Bail!
|
|
if not success:
|
|
depgraph.display_problems()
|
|
sys.exit(1)
|
|
|
|
# Build our own tree from the emerge digraph.
|
|
deps_tree = {}
|
|
digraph = depgraph._dynamic_config.digraph
|
|
for node, node_deps in digraph.nodes.items():
|
|
# Calculate dependency packages that need to be installed first. Each
|
|
# child on the digraph is a dependency. The "operation" field specifies
|
|
# what we're doing (e.g. merge, uninstall, etc.). The "priorities" array
|
|
# contains the type of dependency (e.g. build, runtime, runtime_post,
|
|
# etc.)
|
|
#
|
|
# Emerge itself actually treats some dependencies as "soft" dependencies
|
|
# and sometimes ignores them. We don't do that -- we honor all
|
|
# dependencies unless we're forced to prune them because they're cyclic.
|
|
#
|
|
# Portage refers to the identifiers for packages as a CPV. This acronym
|
|
# stands for Component/Path/Version.
|
|
#
|
|
# Here's an example CPV: chromeos-base/power_manager-0.0.1-r1
|
|
# Split up, this CPV would be:
|
|
# C -- Component: chromeos-base
|
|
# P -- Path: power_manager
|
|
# V -- Version: 0.0.1-r1
|
|
#
|
|
# We just refer to CPVs as packages here because it's easier.
|
|
deps = {}
|
|
for child, priorities in node_deps[0].items():
|
|
deps[str(child.cpv)] = dict(action=str(child.operation),
|
|
deptype=str(priorities[-1]),
|
|
deps={})
|
|
|
|
# We've built our list of deps, so we can add our package to the tree.
|
|
if isinstance(node, Package):
|
|
deps_tree[str(node.cpv)] = dict(action=str(node.operation),
|
|
deps=deps)
|
|
|
|
emptytree = "--emptytree" in emerge.opts
|
|
|
|
# Ask portage for its install plan, so that we can only throw out
|
|
# dependencies that portage throws out. Also, keep track of the old
|
|
# versions of packages that we're either upgrading or replacing.
|
|
#
|
|
# The "vardb" is the database of installed packages.
|
|
vardb = emerge.trees[emerge.settings["ROOT"]]["vartree"].dbapi
|
|
deps_info = {}
|
|
for pkg in depgraph.altlist():
|
|
if isinstance(pkg, Package):
|
|
# If we're not in emptytree mode, and we're going to replace a package
|
|
# that is already installed, then this operation is possibly optional.
|
|
# ("--selective" mode is handled later, in RemoveInstalledPackages())
|
|
optional = False
|
|
if not emptytree and vardb.cpv_exists(pkg.cpv):
|
|
optional = True
|
|
|
|
# Add the package to our database.
|
|
self.package_db[str(pkg.cpv)] = pkg
|
|
|
|
# Save off info about the package
|
|
deps_info[str(pkg.cpv)] = {"idx": len(deps_info),
|
|
"optional": optional}
|
|
|
|
# Delete the --tree option, because we don't really want to display a
|
|
# tree. We just wanted to get emerge to leave uninstall instructions on
|
|
# the graph. Later, when we display the graph, we'll want standard-looking
|
|
# output, so removing the --tree option is important.
|
|
depgraph._frozen_config.myopts.pop("--tree", None)
|
|
|
|
seconds = time.time() - start
|
|
if "--quiet" not in emerge.opts:
|
|
print "Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60)
|
|
|
|
return deps_tree, deps_info
|
|
|
|
def PrintTree(self, deps, depth=""):
|
|
"""Print the deps we have seen in the emerge output.
|
|
|
|
Args:
|
|
deps: Dependency tree structure.
|
|
depth: Allows printing the tree recursively, with indentation.
|
|
"""
|
|
for entry in sorted(deps):
|
|
action = deps[entry]["action"]
|
|
print "%s %s (%s)" % (depth, entry, action)
|
|
self.PrintTree(deps[entry]["deps"], depth=depth + " ")
|
|
|
|
def GenDependencyGraph(self, deps_tree, deps_info):
|
|
"""Generate a doubly linked dependency graph.
|
|
|
|
Args:
|
|
deps_tree: Dependency tree structure.
|
|
deps_info: More details on the dependencies.
|
|
Returns:
|
|
Deps graph in the form of a dict of packages, with each package
|
|
specifying a "needs" list and "provides" list.
|
|
"""
|
|
emerge = self.emerge
|
|
root = emerge.settings["ROOT"]
|
|
|
|
# It's useful to know what packages will actually end up on the
|
|
# system at some point. Packages in final_db are either already
|
|
# installed, or will be installed by the time we're done.
|
|
final_db = emerge.depgraph._dynamic_config.mydbapi[root]
|
|
|
|
# final_pkgs is a set of the packages we found in the final_db. These
|
|
# packages are either already installed, or will be installed by the time
|
|
# we're done. It's populated in BuildFinalPackageSet()
|
|
final_pkgs = set()
|
|
|
|
# These packages take a really long time to build, so, for expediency, we
|
|
# are blacklisting them from automatic rebuilds. Instead, these packages
|
|
# will only be rebuilt when they are explicitly rev'd.
|
|
rebuild_blacklist = set()
|
|
for pkg in ("media-plugins/o3d", "dev-java/icedtea"):
|
|
for match in final_db.match_pkgs(pkg):
|
|
rebuild_blacklist.add(str(match.cpv))
|
|
|
|
# deps_map is the actual dependency graph.
|
|
#
|
|
# Each package specifies a "needs" list and a "provides" list. The "needs"
|
|
# list indicates which packages we depend on. The "provides" list
|
|
# indicates the reverse dependencies -- what packages need us.
|
|
#
|
|
# We also provide some other information in the dependency graph:
|
|
# - action: What we're planning on doing with this package. Generally,
|
|
# "merge", "nomerge", or "uninstall"
|
|
# - mandatory_source:
|
|
# If true, indicates that this package must be compiled from source.
|
|
# We set this for "workon" packages, and for packages where the
|
|
# binaries are known to be out of date.
|
|
# - mandatory:
|
|
# If true, indicates that this package must be installed. We don't care
|
|
# whether it's binary or source, unless the mandatory_source flag is
|
|
# also set.
|
|
#
|
|
deps_map = {}
|
|
|
|
def ReverseTree(packages):
|
|
"""Convert tree to digraph.
|
|
|
|
Take the tree of package -> requirements and reverse it to a digraph of
|
|
buildable packages -> packages they unblock.
|
|
Args:
|
|
packages: Tree(s) of dependencies.
|
|
Returns:
|
|
Unsanitized digraph.
|
|
"""
|
|
for pkg in packages:
|
|
|
|
# Create an entry for the package
|
|
action = packages[pkg]["action"]
|
|
default_pkg = {"needs": {}, "provides": set(), "action": action,
|
|
"mandatory_source": False, "mandatory": False}
|
|
this_pkg = deps_map.setdefault(pkg, default_pkg)
|
|
|
|
# Create entries for dependencies of this package first.
|
|
ReverseTree(packages[pkg]["deps"])
|
|
|
|
# Add dependencies to this package.
|
|
for dep, dep_item in packages[pkg]["deps"].iteritems():
|
|
dep_pkg = deps_map[dep]
|
|
dep_type = dep_item["deptype"]
|
|
if dep_type != "runtime_post":
|
|
dep_pkg["provides"].add(pkg)
|
|
this_pkg["needs"][dep] = dep_type
|
|
|
|
def BuildFinalPackageSet():
|
|
# If this package is installed, or will get installed, add it to
|
|
# final_pkgs
|
|
for pkg in deps_map:
|
|
for match in final_db.match_pkgs(pkg):
|
|
final_pkgs.add(str(match.cpv))
|
|
|
|
def FindCycles():
|
|
"""Find cycles in the dependency tree.
|
|
|
|
Returns:
|
|
Dict of packages involved in cyclic dependencies, mapping each package
|
|
to a list of the cycles the package is involved in.
|
|
"""
|
|
|
|
def FindCyclesAtNode(pkg, cycles, unresolved):
|
|
"""Find cycles in cyclic dependencies starting at specified package.
|
|
|
|
Args:
|
|
pkg: Package identifier.
|
|
cycles: Set of cycles so far.
|
|
unresolved: Nodes that have been visited but are not fully processed.
|
|
"""
|
|
unresolved.append(pkg)
|
|
mycycles = cycles.get(pkg)
|
|
if mycycles:
|
|
mycycles = mycycles.get("pkgs")
|
|
for dep in deps_map[pkg]["needs"]:
|
|
if mycycles and dep in mycycles:
|
|
continue
|
|
elif dep in unresolved:
|
|
idx = unresolved.index(dep)
|
|
mycycle = unresolved[idx:] + [dep]
|
|
for cycle_pkg in mycycle:
|
|
info = cycles.setdefault(cycle_pkg, {})
|
|
info.setdefault("pkgs", set()).update(mycycle)
|
|
info.setdefault("cycles", []).append(mycycle)
|
|
else:
|
|
FindCyclesAtNode(dep, cycles, unresolved)
|
|
unresolved.pop()
|
|
|
|
cycles, unresolved = {}, []
|
|
for pkg in deps_map:
|
|
FindCyclesAtNode(pkg, cycles, unresolved)
|
|
return cycles
|
|
|
|
def RemoveInstalledPackages():
|
|
"""Remove installed packages, propagating dependencies."""
|
|
|
|
# If we're in non-selective mode, the packages specified on the command
|
|
# line are generally mandatory.
|
|
#
|
|
# There are a few exceptions to this rule:
|
|
# 1. If the package isn't getting installed because it's in
|
|
# package.provided, it's not mandatory.
|
|
# 2. If the package isn't getting installed because we're in --onlydeps
|
|
# mode, it's not mandatory either.
|
|
if "--selective" in emerge.opts:
|
|
selective = emerge.opts["--selective"] != "n"
|
|
else:
|
|
selective = "--noreplace" in emerge.opts or "--update" in emerge.opts
|
|
onlydeps = "--onlydeps" in emerge.opts
|
|
if not selective:
|
|
for pkg in emerge.cmdline_packages:
|
|
# If the package specified on the command-line is in our install
|
|
# list, mark it as non-optional.
|
|
found = False
|
|
for db_pkg in final_db.match_pkgs(pkg):
|
|
this_pkg = deps_info.get(db_pkg.cpv)
|
|
if this_pkg:
|
|
found = True
|
|
this_pkg["optional"] = False
|
|
|
|
# We didn't find the package in our final db. If we're not in
|
|
# --onlydeps mode, this likely means that the package was specified
|
|
# in package.provided.
|
|
if not found and not onlydeps and "--verbose" in emerge.opts:
|
|
print "Skipping %s (is it in package.provided?)" % pkg
|
|
|
|
# Schedule packages that aren't on the install list for removal
|
|
rm_pkgs = set(deps_map.keys()) - set(deps_info.keys())
|
|
|
|
# Schedule optional packages for removal
|
|
for pkg, info in deps_info.items():
|
|
if info["optional"]:
|
|
rm_pkgs.add(pkg)
|
|
|
|
# Remove the packages we don't want, simplifying the graph and making
|
|
# it easier for us to crack cycles.
|
|
for pkg in sorted(rm_pkgs):
|
|
this_pkg = deps_map[pkg]
|
|
needs = this_pkg["needs"]
|
|
provides = this_pkg["provides"]
|
|
for dep in needs:
|
|
dep_provides = deps_map[dep]["provides"]
|
|
dep_provides.update(provides)
|
|
dep_provides.discard(pkg)
|
|
dep_provides.discard(dep)
|
|
for target in provides:
|
|
target_needs = deps_map[target]["needs"]
|
|
target_needs.update(needs)
|
|
target_needs.pop(pkg, None)
|
|
target_needs.pop(target, None)
|
|
del deps_map[pkg]
|
|
|
|
def SanitizeTree(cycles):
|
|
"""Remove circular dependencies.
|
|
|
|
We prune all dependencies involved in cycles that go against the emerge
|
|
ordering. This has a nice property: we're guaranteed to merge
|
|
dependencies in the same order that portage does.
|
|
|
|
Because we don't treat any dependencies as "soft" unless they're killed
|
|
by a cycle, we pay attention to a larger number of dependencies when
|
|
merging. This hurts performance a bit, but helps reliability.
|
|
|
|
Args:
|
|
cycles: Dict of packages involved in cyclic dependencies, mapping each
|
|
package to a list of the cycles the package is involved in. Produced
|
|
by FindCycles().
|
|
"""
|
|
for basedep, cycle_info in cycles.iteritems():
|
|
for mycycle in cycle_info["cycles"]:
|
|
info = []
|
|
broken = False
|
|
for i in range(len(mycycle) - 1):
|
|
pkg1, pkg2 = mycycle[i], mycycle[i+1]
|
|
needs = deps_map[pkg1]["needs"]
|
|
depinfo = needs.get(pkg2, "deleted")
|
|
bad = False
|
|
if (deps_info[pkg1]["idx"] >= deps_info[pkg2]["idx"] and
|
|
depinfo != "deleted"):
|
|
depinfo = depinfo + ", deleting"
|
|
broken = True
|
|
del deps_map[pkg1]["needs"][pkg2]
|
|
deps_map[pkg2]["provides"].remove(pkg1)
|
|
info.append(" %s -> %s (%s)" % (pkg1, pkg2, depinfo))
|
|
if broken:
|
|
print "Breaking cycle:"
|
|
print "\n".join(info)
|
|
|
|
def AddSecretDeps():
|
|
"""Find these tagged packages and add extra dependencies.
|
|
|
|
For debugging dependency problems.
|
|
"""
|
|
for bad in secret_deps:
|
|
needed = secret_deps[bad]
|
|
bad_pkg = None
|
|
needed_pkg = None
|
|
for dep in deps_map:
|
|
if dep.find(bad) != -1:
|
|
bad_pkg = dep
|
|
if dep.find(needed) != -1:
|
|
needed_pkg = dep
|
|
if bad_pkg and needed_pkg:
|
|
deps_map[needed_pkg]["provides"].add(bad_pkg)
|
|
deps_map[bad_pkg]["needs"][needed_pkg] = "secret"
|
|
|
|
def MergeChildren(pkg, merge_type):
|
|
"""Merge this package and all packages it provides."""
|
|
|
|
this_pkg = deps_map[pkg]
|
|
if (this_pkg[merge_type] or pkg not in final_pkgs or
|
|
pkg in rebuild_blacklist):
|
|
return
|
|
|
|
# Mark this package as non-optional
|
|
deps_info[pkg]["optional"] = False
|
|
this_pkg[merge_type] = True
|
|
for w in this_pkg["provides"]:
|
|
MergeChildren(w, merge_type)
|
|
|
|
if this_pkg["action"] == "nomerge":
|
|
this_pkg["action"] = "merge"
|
|
|
|
def RemotePackageDatabase(binhost_url):
|
|
"""Grab the latest binary package database from the prebuilt server.
|
|
|
|
We need to know the modification times of the prebuilt packages so that we
|
|
know when it is OK to use these packages and when we should rebuild them
|
|
instead.
|
|
|
|
Args:
|
|
binhost_url: Base URL of remote packages (PORTAGE_BINHOST).
|
|
|
|
Returns:
|
|
A dict mapping package identifiers to modification times.
|
|
"""
|
|
|
|
if not binhost_url:
|
|
return {}
|
|
|
|
url = binhost_url + "/Packages"
|
|
|
|
prebuilt_pkgs = {}
|
|
try:
|
|
f = urllib2.urlopen(url)
|
|
except urllib2.HTTPError as e:
|
|
if e.code == 404:
|
|
return {}
|
|
else:
|
|
raise
|
|
for line in f:
|
|
if line.startswith("CPV: "):
|
|
pkg = line.replace("CPV: ", "").rstrip()
|
|
elif line.startswith("MTIME: "):
|
|
prebuilt_pkgs[pkg] = int(line[:-1].replace("MTIME: ", ""))
|
|
f.close()
|
|
|
|
return prebuilt_pkgs
|
|
|
|
def LocalPackageDatabase():
|
|
"""Get the modification times of the packages in the local database.
|
|
|
|
We need to know the modification times of the local packages so that we
|
|
know when they need to be rebuilt.
|
|
|
|
Returns:
|
|
A dict mapping package identifiers to modification times.
|
|
"""
|
|
if self.board:
|
|
path = "/build/%s/packages/Packages" % self.board
|
|
else:
|
|
path = "/var/lib/portage/pkgs/Packages"
|
|
local_pkgs = {}
|
|
for line in file(path):
|
|
if line.startswith("CPV: "):
|
|
pkg = line.replace("CPV: ", "").rstrip()
|
|
elif line.startswith("MTIME: "):
|
|
local_pkgs[pkg] = int(line[:-1].replace("MTIME: ", ""))
|
|
|
|
return local_pkgs
|
|
|
|
def AutoRebuildDeps(local_pkgs, remote_pkgs, cycles):
|
|
"""Recursively rebuild packages when necessary using modification times.
|
|
|
|
If you've modified a package, it's a good idea to rebuild all the packages
|
|
that depend on it from source. This function looks for any packages which
|
|
depend on packages that have been modified and ensures that they get
|
|
rebuilt.
|
|
|
|
Args:
|
|
local_pkgs: Modification times from the local database.
|
|
remote_pkgs: Modification times from the prebuilt server.
|
|
cycles: Dictionary returned from FindCycles()
|
|
|
|
Returns:
|
|
The set of packages we marked as needing to be merged.
|
|
"""
|
|
|
|
def PrebuiltsReady(pkg, pkg_db, cache):
|
|
"""Check whether the prebuilts are ready for pkg and all deps.
|
|
|
|
Args:
|
|
pkg: The specified package.
|
|
pkg_db: The package DB to use.
|
|
cache: A dict, where the results are stored.
|
|
|
|
Returns:
|
|
True iff the prebuilts are ready for pkg and all deps.
|
|
"""
|
|
if pkg in cache:
|
|
return cache[pkg]
|
|
if pkg not in pkg_db:
|
|
cache[pkg] = False
|
|
else:
|
|
cache[pkg] = True
|
|
for dep in deps_map[pkg]["needs"]:
|
|
if not PrebuiltsReady(dep, pkg_db, cache):
|
|
cache[pkg] = False
|
|
break
|
|
return cache[pkg]
|
|
|
|
def LastModifiedWithDeps(pkg, pkg_db, cache):
|
|
"""Calculate the last modified time of a package and its dependencies.
|
|
|
|
This function looks at all the packages needed by the specified package
|
|
and checks the most recent modification time of all of those packages.
|
|
If the dependencies of a package were modified more recently than the
|
|
package itself, then we know the package needs to be rebuilt.
|
|
|
|
Args:
|
|
pkg: The specified package.
|
|
pkg_db: The package DB to use.
|
|
cache: A dict, where the last modified times are stored.
|
|
|
|
Returns:
|
|
The last modified time of the specified package and its dependencies.
|
|
"""
|
|
if pkg in cache:
|
|
return cache[pkg]
|
|
|
|
cache[pkg] = pkg_db.get(pkg, 0)
|
|
for dep in deps_map[pkg]["needs"]:
|
|
t = LastModifiedWithDeps(dep, pkg_db, cache)
|
|
cache[pkg] = max(cache[pkg], t)
|
|
return cache[pkg]
|
|
|
|
# For every package that's getting updated in our local cache (binary
|
|
# or source), make sure we also update the children. If a package is
|
|
# built from source, all children must also be built from source.
|
|
local_ready_cache, remote_ready_cache = {}, {}
|
|
local_mtime_cache, remote_mtime_cache = {}, {}
|
|
for pkg in final_pkgs:
|
|
# If all the necessary local packages are ready, and their
|
|
# modification times are in sync, we don't need to do anything here.
|
|
local_mtime = LastModifiedWithDeps(pkg, local_pkgs, local_mtime_cache)
|
|
local_ready = PrebuiltsReady(pkg, local_pkgs, local_ready_cache)
|
|
if (not local_ready or local_pkgs.get(pkg, 0) < local_mtime and
|
|
pkg not in cycles):
|
|
# OK, at least one package is missing from the local cache or is
|
|
# outdated. This means we're going to have to install the package
|
|
# and all dependencies.
|
|
#
|
|
# If all the necessary remote packages are ready, and they're at
|
|
# least as new as our local packages, we can install them.
|
|
# Otherwise, we need to build from source.
|
|
remote_mtime = LastModifiedWithDeps(pkg, remote_pkgs,
|
|
remote_mtime_cache)
|
|
remote_ready = PrebuiltsReady(pkg, remote_pkgs, remote_ready_cache)
|
|
if remote_ready and (local_mtime <= remote_mtime or pkg in cycles):
|
|
MergeChildren(pkg, "mandatory")
|
|
else:
|
|
MergeChildren(pkg, "mandatory_source")
|
|
|
|
def UsePrebuiltPackages():
|
|
"""Update packages that can use prebuilts to do so."""
|
|
start = time.time()
|
|
|
|
# The bintree is the database of binary packages. By default, it's
|
|
# empty.
|
|
bintree = emerge.trees[root]["bintree"]
|
|
bindb = bintree.dbapi
|
|
root_config = emerge.root_config
|
|
prebuilt_pkgs = {}
|
|
|
|
# Populate the DB with packages
|
|
bintree.populate("--getbinpkg" in emerge.opts,
|
|
"--getbinpkgonly" in emerge.opts)
|
|
|
|
# Update packages that can use prebuilts to do so.
|
|
for pkg, info in deps_map.iteritems():
|
|
if info and not info["mandatory_source"] and info["action"] == "merge":
|
|
db_keys = list(bindb._aux_cache_keys)
|
|
try:
|
|
db_vals = bindb.aux_get(pkg, db_keys + ["MTIME"])
|
|
except KeyError:
|
|
# No binary package
|
|
continue
|
|
mtime = int(db_vals.pop() or 0)
|
|
metadata = zip(db_keys, db_vals)
|
|
db_pkg = Package(built=True, cpv=pkg, installed=False,
|
|
metadata=metadata, onlydeps=False, mtime=mtime,
|
|
operation="merge", root_config=root_config,
|
|
type_name="binary")
|
|
self.package_db[pkg] = db_pkg
|
|
|
|
seconds = time.time() - start
|
|
if "--quiet" not in emerge.opts:
|
|
print "Prebuilt DB populated in %dm%.1fs" % (seconds / 60, seconds % 60)
|
|
|
|
return prebuilt_pkgs
|
|
|
|
def AddRemainingPackages():
|
|
"""Fill in packages that don't have entries in the package db.
|
|
|
|
Every package we are installing needs an entry in the package db.
|
|
This function should only be called after we have removed the
|
|
packages that are not being merged from our deps_map.
|
|
"""
|
|
for pkg in deps_map:
|
|
if pkg not in self.package_db:
|
|
if deps_map[pkg]["action"] != "merge":
|
|
# We should only fill in packages that are being merged. If
|
|
# there's any other packages here, something funny is going on.
|
|
print "Missing entry for %s in package db" % pkg
|
|
sys.exit(1)
|
|
|
|
db_pkg = emerge.depgraph._pkg(pkg, "ebuild", emerge.root_config)
|
|
self.package_db[pkg] = db_pkg
|
|
|
|
ReverseTree(deps_tree)
|
|
BuildFinalPackageSet()
|
|
AddSecretDeps()
|
|
|
|
if self.no_workon_deps:
|
|
for pkg in self.mandatory_source.copy():
|
|
for db_pkg in final_db.match_pkgs(pkg):
|
|
deps_map[str(db_pkg.cpv)]["mandatory_source"] = True
|
|
else:
|
|
for pkg in self.mandatory_source.copy():
|
|
for db_pkg in final_db.match_pkgs(pkg):
|
|
MergeChildren(str(db_pkg.cpv), "mandatory_source")
|
|
|
|
cycles = FindCycles()
|
|
if self.rebuild:
|
|
local_pkgs = LocalPackageDatabase()
|
|
remote_pkgs = RemotePackageDatabase(emerge.settings["PORTAGE_BINHOST"])
|
|
AutoRebuildDeps(local_pkgs, remote_pkgs, cycles)
|
|
|
|
# We need to remove installed packages so that we can use the dependency
|
|
# ordering of the install process to show us what cycles to crack. Once
|
|
# we've done that, we also need to recalculate our list of cycles so that
|
|
# we don't include the installed packages in our cycles.
|
|
RemoveInstalledPackages()
|
|
cycles = FindCycles()
|
|
SanitizeTree(cycles)
|
|
if deps_map:
|
|
if "--usepkg" in emerge.opts:
|
|
UsePrebuiltPackages()
|
|
AddRemainingPackages()
|
|
return deps_map
|
|
|
|
def PrintInstallPlan(self, deps_map):
|
|
"""Print an emerge-style install plan.
|
|
|
|
The install plan lists what packages we're installing, in order.
|
|
It's useful for understanding what parallel_emerge is doing.
|
|
|
|
Args:
|
|
deps_map: The dependency graph.
|
|
"""
|
|
|
|
def InstallPlanAtNode(target, deps_map):
|
|
nodes = []
|
|
nodes.append(target)
|
|
for dep in deps_map[target]["provides"]:
|
|
del deps_map[dep]["needs"][target]
|
|
if not deps_map[dep]["needs"]:
|
|
nodes.extend(InstallPlanAtNode(dep, deps_map))
|
|
return nodes
|
|
|
|
deps_map = copy.deepcopy(deps_map)
|
|
install_plan = []
|
|
plan = set()
|
|
for target, info in deps_map.iteritems():
|
|
if not info["needs"] and target not in plan:
|
|
for item in InstallPlanAtNode(target, deps_map):
|
|
plan.add(item)
|
|
install_plan.append(self.package_db[item])
|
|
|
|
for pkg in plan:
|
|
del deps_map[pkg]
|
|
|
|
if deps_map:
|
|
print "Cyclic dependencies:", " ".join(deps_map)
|
|
PrintDepsMap(deps_map)
|
|
sys.exit(1)
|
|
|
|
self.emerge.depgraph.display(install_plan)
|
|
|
|
|
|
def PrintDepsMap(deps_map):
|
|
"""Print dependency graph, for each package list it's prerequisites."""
|
|
for i in sorted(deps_map):
|
|
print "%s: (%s) needs" % (i, deps_map[i]["action"])
|
|
needs = deps_map[i]["needs"]
|
|
for j in sorted(needs):
|
|
print " %s" % (j)
|
|
if not needs:
|
|
print " no dependencies"
|
|
|
|
|
|
class EmergeJobState(object):
|
|
__slots__ = ["done", "filename", "last_output_seek", "last_output_timestamp",
|
|
"pkgname", "retcode", "start_timestamp", "target"]
|
|
|
|
def __init__(self, target, pkgname, done, filename, start_timestamp,
|
|
retcode=None):
|
|
|
|
# The full name of the target we're building (e.g.
|
|
# chromeos-base/chromeos-0.0.1-r60)
|
|
self.target = target
|
|
|
|
# The short name of the target we're building (e.g. chromeos-0.0.1-r60)
|
|
self.pkgname = pkgname
|
|
|
|
# Whether the job is done. (True if the job is done; false otherwise.)
|
|
self.done = done
|
|
|
|
# The filename where output is currently stored.
|
|
self.filename = filename
|
|
|
|
# The location (in bytes) of the end of the last complete line we printed.
|
|
# This starts off at zero. We use this to jump to the right place when we
|
|
# print output from the same ebuild multiple times.
|
|
self.last_output_seek = 0
|
|
|
|
# The timestamp of the last time we printed output. Since we haven't
|
|
# printed output yet, this starts at zero.
|
|
self.last_output_timestamp = 0
|
|
|
|
# The return code of our job, if the job is actually finished.
|
|
self.retcode = retcode
|
|
|
|
# The timestamp when our job started.
|
|
self.start_timestamp = start_timestamp
|
|
|
|
|
|
def SetupWorkerSignals():
|
|
def ExitHandler(signum, frame):
|
|
# Remove our signal handlers so we don't get called recursively.
|
|
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
|
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
|
|
|
# Try to exit cleanly
|
|
sys.exit(1)
|
|
|
|
# Ensure that we exit quietly and cleanly, if possible, when we receive
|
|
# SIGTERM or SIGINT signals. By default, when the user hits CTRL-C, all
|
|
# of the child processes will print details about KeyboardInterrupt
|
|
# exceptions, which isn't very helpful.
|
|
signal.signal(signal.SIGINT, ExitHandler)
|
|
signal.signal(signal.SIGTERM, ExitHandler)
|
|
|
|
|
|
def EmergeWorker(task_queue, job_queue, emerge, package_db):
|
|
"""This worker emerges any packages given to it on the task_queue.
|
|
|
|
Args:
|
|
task_queue: The queue of tasks for this worker to do.
|
|
job_queue: The queue of results from the worker.
|
|
emerge: An EmergeData() object.
|
|
package_db: A dict, mapping package ids to portage Package objects.
|
|
|
|
It expects package identifiers to be passed to it via task_queue. When
|
|
a task is started, it pushes the (target, filename) to the started_queue.
|
|
The output is stored in filename. When a merge starts or finishes, we push
|
|
EmergeJobState objects to the job_queue.
|
|
"""
|
|
|
|
SetupWorkerSignals()
|
|
settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb
|
|
opts, spinner = emerge.opts, emerge.spinner
|
|
opts["--nodeps"] = True
|
|
while True:
|
|
# Wait for a new item to show up on the queue. This is a blocking wait,
|
|
# so if there's nothing to do, we just sit here.
|
|
target = task_queue.get()
|
|
db_pkg = package_db[target]
|
|
db_pkg.root_config = emerge.root_config
|
|
install_list = [db_pkg]
|
|
pkgname = db_pkg.pf
|
|
output = tempfile.NamedTemporaryFile(prefix=pkgname + "-", delete=False)
|
|
start_timestamp = time.time()
|
|
job = EmergeJobState(target, pkgname, False, output.name, start_timestamp)
|
|
job_queue.put(job)
|
|
if "--pretend" in opts:
|
|
retcode = 0
|
|
else:
|
|
save_stdout = sys.stdout
|
|
save_stderr = sys.stderr
|
|
try:
|
|
sys.stdout = output
|
|
sys.stderr = output
|
|
scheduler = Scheduler(settings, trees, mtimedb, opts, spinner,
|
|
install_list, [], emerge.scheduler_graph)
|
|
retcode = scheduler.merge()
|
|
except Exception:
|
|
traceback.print_exc(file=output)
|
|
retcode = 1
|
|
finally:
|
|
sys.stdout = save_stdout
|
|
sys.stderr = save_stderr
|
|
output.close()
|
|
if retcode is None:
|
|
retcode = 0
|
|
|
|
job = EmergeJobState(target, pkgname, True, output.name, start_timestamp,
|
|
retcode)
|
|
job_queue.put(job)
|
|
|
|
|
|
class LinePrinter(object):
|
|
"""Helper object to print a single line."""
|
|
|
|
def __init__(self, line):
|
|
self.line = line
|
|
|
|
def Print(self, seek_locations):
|
|
print self.line
|
|
|
|
|
|
class JobPrinter(object):
|
|
"""Helper object to print output of a job."""
|
|
|
|
def __init__(self, job, unlink=False):
|
|
"""Print output of job.
|
|
|
|
If unlink is True, unlink the job output file when done."""
|
|
self.current_time = time.time()
|
|
self.job = job
|
|
self.unlink = unlink
|
|
|
|
def Print(self, seek_locations):
|
|
|
|
job = self.job
|
|
|
|
# Calculate how long the job has been running.
|
|
seconds = self.current_time - job.start_timestamp
|
|
|
|
# Note that we've printed out the job so far.
|
|
job.last_output_timestamp = self.current_time
|
|
|
|
# Note that we're starting the job
|
|
info = "job %s (%dm%.1fs)" % (job.pkgname, seconds / 60, seconds % 60)
|
|
last_output_seek = seek_locations.get(job.filename, 0)
|
|
if last_output_seek:
|
|
print "=== Continue output for %s ===" % info
|
|
else:
|
|
print "=== Start output for %s ===" % info
|
|
|
|
# Print actual output from job
|
|
f = codecs.open(job.filename, encoding='utf-8', errors='replace')
|
|
f.seek(last_output_seek)
|
|
prefix = job.pkgname + ":"
|
|
for line in f:
|
|
|
|
# Save off our position in the file
|
|
if line and line[-1] == "\n":
|
|
last_output_seek = f.tell()
|
|
line = line[:-1]
|
|
|
|
# Print our line
|
|
print prefix, line.encode('utf-8', 'replace')
|
|
f.close()
|
|
|
|
# Save our last spot in the file so that we don't print out the same
|
|
# location twice.
|
|
seek_locations[job.filename] = last_output_seek
|
|
|
|
# Note end of output section
|
|
if job.done:
|
|
print "=== Complete: %s ===" % info
|
|
else:
|
|
print "=== Still running: %s ===" % info
|
|
|
|
if self.unlink:
|
|
os.unlink(job.filename)
|
|
|
|
|
|
def PrintWorker(queue):
|
|
"""A worker that prints stuff to the screen as requested."""
|
|
SetupWorkerSignals()
|
|
seek_locations = {}
|
|
while True:
|
|
job = queue.get()
|
|
if job:
|
|
job.Print(seek_locations)
|
|
else:
|
|
break
|
|
|
|
|
|
class EmergeQueue(object):
|
|
"""Class to schedule emerge jobs according to a dependency graph."""
|
|
|
|
def __init__(self, deps_map, emerge, package_db, show_output):
|
|
# Store the dependency graph.
|
|
self._deps_map = deps_map
|
|
# Initialize the running queue to empty
|
|
self._jobs = {}
|
|
# List of total package installs represented in deps_map.
|
|
install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"]
|
|
self._total_jobs = len(install_jobs)
|
|
self._show_output = show_output
|
|
|
|
if "--pretend" in emerge.opts:
|
|
print "Skipping merge because of --pretend mode."
|
|
sys.exit(0)
|
|
|
|
# Setup scheduler graph object. This is used by the child processes
|
|
# to help schedule jobs.
|
|
emerge.scheduler_graph = emerge.depgraph.schedulerGraph()
|
|
|
|
# Calculate how many jobs we can run in parallel. We don't want to pass
|
|
# the --jobs flag over to emerge itself, because that'll tell emerge to
|
|
# hide its output, and said output is quite useful for debugging hung
|
|
# jobs.
|
|
procs = min(self._total_jobs,
|
|
emerge.opts.pop("--jobs", multiprocessing.cpu_count()))
|
|
self._emerge_queue = multiprocessing.Queue()
|
|
self._job_queue = multiprocessing.Queue()
|
|
self._print_queue = multiprocessing.Queue()
|
|
args = (self._emerge_queue, self._job_queue, emerge, package_db)
|
|
self._pool = multiprocessing.Pool(procs, EmergeWorker, args)
|
|
self._print_worker = multiprocessing.Process(target=PrintWorker,
|
|
args=[self._print_queue])
|
|
self._print_worker.start()
|
|
|
|
# Initialize the failed queue to empty.
|
|
self._retry_queue = []
|
|
self._failed = set()
|
|
|
|
# Print an update before we launch the merges.
|
|
self._Status()
|
|
|
|
# Setup an exit handler so that we print nice messages if we are
|
|
# terminated.
|
|
self._SetupExitHandler()
|
|
|
|
# Schedule our jobs.
|
|
for target, info in deps_map.items():
|
|
if not info["needs"]:
|
|
self._Schedule(target)
|
|
|
|
def _SetupExitHandler(self):
|
|
|
|
def ExitHandler(signum, frame):
|
|
|
|
# Kill our signal handlers so we don't get called recursively
|
|
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
|
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
|
|
|
# Print our current job status
|
|
for target, job in self._jobs.iteritems():
|
|
if job:
|
|
self._print_queue.put(JobPrinter(job, unlink=True))
|
|
|
|
# Notify the user that we are exiting
|
|
self._Print("Exiting on signal %s" % signum)
|
|
|
|
# Exit when print worker is done.
|
|
self._print_queue.put(None)
|
|
self._print_worker.join()
|
|
sys.exit(1)
|
|
|
|
# Print out job status when we are killed
|
|
signal.signal(signal.SIGINT, ExitHandler)
|
|
signal.signal(signal.SIGTERM, ExitHandler)
|
|
|
|
def _Schedule(self, target):
|
|
# We maintain a tree of all deps, if this doesn't need
|
|
# to be installed just free up it's children and continue.
|
|
# It is possible to reinstall deps of deps, without reinstalling
|
|
# first level deps, like so:
|
|
# chromeos (merge) -> eselect (nomerge) -> python (merge)
|
|
if self._deps_map[target]["action"] == "nomerge":
|
|
self._Finish(target)
|
|
else:
|
|
# Kick off the build if it's marked to be built.
|
|
self._jobs[target] = None
|
|
self._emerge_queue.put(target)
|
|
|
|
def _LoadAvg(self):
|
|
loads = open("/proc/loadavg", "r").readline().split()[:3]
|
|
return " ".join(loads)
|
|
|
|
def _Print(self, line):
|
|
"""Print a single line."""
|
|
self._print_queue.put(LinePrinter(line))
|
|
|
|
def _Status(self):
|
|
"""Print status."""
|
|
current_time = time.time()
|
|
seconds = current_time - GLOBAL_START
|
|
line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s "
|
|
"[Time %dm%.1fs Load %s]")
|
|
qsize = self._emerge_queue.qsize()
|
|
self._Print(line % (len(self._deps_map), qsize, len(self._jobs) - qsize,
|
|
len(self._retry_queue), self._total_jobs,
|
|
seconds / 60, seconds % 60, self._LoadAvg()))
|
|
|
|
# Print interim output every minute if --show-output is used. Otherwise,
|
|
# only print output if a job has been running for 60 minutes or more.
|
|
if self._show_output:
|
|
interval = 60
|
|
else:
|
|
interval = 60 * 60
|
|
for target, job in self._jobs.iteritems():
|
|
if job:
|
|
last_timestamp = max(job.start_timestamp, job.last_output_timestamp)
|
|
if last_timestamp + interval < current_time:
|
|
self._print_queue.put(JobPrinter(job))
|
|
job.last_output_timestamp = current_time
|
|
|
|
def _Finish(self, target):
|
|
"""Mark a target as completed and unblock dependecies."""
|
|
for dep in self._deps_map[target]["provides"]:
|
|
del self._deps_map[dep]["needs"][target]
|
|
if not self._deps_map[dep]["needs"]:
|
|
self._Schedule(dep)
|
|
self._deps_map.pop(target)
|
|
|
|
def _Retry(self):
|
|
if self._retry_queue:
|
|
target = self._retry_queue.pop(0)
|
|
self._Schedule(target)
|
|
self._Print("Retrying emerge of %s." % target)
|
|
|
|
def Run(self):
|
|
"""Run through the scheduled ebuilds.
|
|
|
|
Keep running so long as we have uninstalled packages in the
|
|
dependency graph to merge.
|
|
"""
|
|
while self._deps_map:
|
|
# Check here that we are actually waiting for something.
|
|
if (self._emerge_queue.empty() and
|
|
self._job_queue.empty() and
|
|
not self._jobs and
|
|
self._deps_map):
|
|
# If we have failed on a package, retry it now.
|
|
if self._retry_queue:
|
|
self._Retry()
|
|
else:
|
|
# Tell the print worker we're done, and wait for it to exit.
|
|
self._print_queue.put(None)
|
|
self._print_worker.join()
|
|
|
|
# The dependency map is helpful for debugging failures.
|
|
PrintDepsMap(self._deps_map)
|
|
|
|
# Tell the user why we're exiting.
|
|
if self._failed:
|
|
print "Packages failed: %s" % " ,".join(self._failed)
|
|
else:
|
|
print "Deadlock! Circular dependencies!"
|
|
sys.exit(1)
|
|
|
|
try:
|
|
job = self._job_queue.get(timeout=5)
|
|
except Queue.Empty:
|
|
# Print an update.
|
|
self._Status()
|
|
continue
|
|
|
|
target = job.target
|
|
|
|
if not job.done:
|
|
self._jobs[target] = job
|
|
self._Print("Started %s (logged in %s)" % (target, job.filename))
|
|
continue
|
|
|
|
# Print output of job
|
|
if self._show_output or job.retcode != 0:
|
|
self._print_queue.put(JobPrinter(job, unlink=True))
|
|
else:
|
|
os.unlink(job.filename)
|
|
del self._jobs[target]
|
|
|
|
seconds = time.time() - job.start_timestamp
|
|
details = "%s (in %dm%.1fs)" % (target, seconds / 60, seconds % 60)
|
|
|
|
# Complain if necessary.
|
|
if job.retcode != 0:
|
|
# Handle job failure.
|
|
if target in self._failed:
|
|
# If this job has failed previously, give up.
|
|
self._Print("Failed %s. Your build has failed." % details)
|
|
else:
|
|
# Queue up this build to try again after a long while.
|
|
self._retry_queue.append(target)
|
|
self._failed.add(target)
|
|
self._Print("Failed %s, retrying later." % details)
|
|
else:
|
|
if target in self._failed and self._retry_queue:
|
|
# If we have successfully retried a failed package, and there
|
|
# are more failed packages, try the next one. We will only have
|
|
# one retrying package actively running at a time.
|
|
self._Retry()
|
|
|
|
self._Print("Completed %s" % details)
|
|
# Mark as completed and unblock waiting ebuilds.
|
|
self._Finish(target)
|
|
|
|
# Print an update.
|
|
self._Status()
|
|
|
|
# Tell the print worker we're done, and wait for it to exit.
|
|
self._print_queue.put(None)
|
|
self._print_worker.join()
|
|
|
|
|
|
def main():
|
|
|
|
deps = DepGraphGenerator()
|
|
deps.Initialize(sys.argv[1:])
|
|
emerge = deps.emerge
|
|
|
|
if emerge.action is not None:
|
|
sys.argv = deps.ParseParallelEmergeArgs(sys.argv)
|
|
sys.exit(emerge_main())
|
|
elif not emerge.cmdline_packages:
|
|
Usage()
|
|
sys.exit(1)
|
|
|
|
# Unless we're in pretend mode, there's not much point running without
|
|
# root access. We need to be able to install packages.
|
|
#
|
|
# NOTE: Even if you're running --pretend, it's a good idea to run
|
|
# parallel_emerge with root access so that portage can write to the
|
|
# dependency cache. This is important for performance.
|
|
if "--pretend" not in emerge.opts and portage.secpass < 2:
|
|
print "parallel_emerge: superuser access is required."
|
|
sys.exit(1)
|
|
|
|
if "--quiet" not in emerge.opts:
|
|
cmdline_packages = " ".join(emerge.cmdline_packages)
|
|
print "Starting fast-emerge."
|
|
print " Building package %s on %s" % (cmdline_packages,
|
|
deps.board or "root")
|
|
|
|
deps_tree, deps_info = deps.GenDependencyTree()
|
|
|
|
# You want me to be verbose? I'll give you two trees! Twice as much value.
|
|
if "--tree" in emerge.opts and "--verbose" in emerge.opts:
|
|
deps.PrintTree(deps_tree)
|
|
|
|
deps_graph = deps.GenDependencyGraph(deps_tree, deps_info)
|
|
|
|
# OK, time to print out our progress so far.
|
|
deps.PrintInstallPlan(deps_graph)
|
|
if "--tree" in emerge.opts:
|
|
PrintDepsMap(deps_graph)
|
|
|
|
# Run the queued emerges.
|
|
scheduler = EmergeQueue(deps_graph, emerge, deps.package_db, deps.show_output)
|
|
scheduler.Run()
|
|
|
|
# Update world.
|
|
if ("--oneshot" not in emerge.opts and
|
|
"--pretend" not in emerge.opts):
|
|
world_set = emerge.root_config.sets["selected"]
|
|
new_world_pkgs = []
|
|
root = emerge.settings["ROOT"]
|
|
final_db = emerge.depgraph._dynamic_config.mydbapi[root]
|
|
for pkg in emerge.cmdline_packages:
|
|
for db_pkg in final_db.match_pkgs(pkg):
|
|
print "Adding %s to world" % db_pkg.cp
|
|
new_world_pkgs.append(db_pkg.cp)
|
|
if new_world_pkgs:
|
|
world_set.update(new_world_pkgs)
|
|
|
|
# Update environment (library cache, symlinks, etc.)
|
|
if deps.board and "--pretend" not in emerge.opts:
|
|
portage.env_update()
|
|
|
|
print "Done"
|
|
|
|
if __name__ == "__main__":
|
|
main()
|