diff --git a/parallel_emerge b/parallel_emerge index dd0dfac47c..5b09ec6e42 100755 --- a/parallel_emerge +++ b/parallel_emerge @@ -38,21 +38,55 @@ Basic operation: of the same package for a runtime dep). """ +import copy +import multiprocessing import os -import re +import Queue import shlex -import subprocess import sys import tempfile import time -import _emerge.main +import urllib2 + +# If PORTAGE_USERNAME isn't specified, scrape it from the $HOME variable. On +# Chromium OS, the default "portage" user doesn't have the necessary +# permissions. It'd be easier if we could default to $USERNAME, but $USERNAME +# is "root" here because we get called through sudo. +# +# We need to set this before importing any portage modules, because portage +# looks up "PORTAGE_USERNAME" at import time. +# +# NOTE: .bashrc sets PORTAGE_USERNAME = $USERNAME, so most people won't +# encounter this case unless they have an old chroot or blow away the +# environment by running sudo without the -E specifier. +if "PORTAGE_USERNAME" not in os.environ: + homedir = os.environ["HOME"] + if homedir.startswith("/home/"): + os.environ["PORTAGE_USERNAME"] = homedir.split("/")[2] + +# Portage doesn't expose dependency trees in its public API, so we have to +# make use of some private APIs here. These modules are found under +# /usr/lib/portage/pym/. +# +# TODO(davidjames): Update Portage to expose public APIs for these features. +from _emerge.actions import adjust_configs +from _emerge.actions import load_emerge_config +from _emerge.create_depgraph_params import create_depgraph_params +from _emerge.depgraph import backtrack_depgraph +from _emerge.main import emerge_main +from _emerge.main import parse_opts +from _emerge.Package import Package +from _emerge.Scheduler import Scheduler +from _emerge.stdout_spinner import stdout_spinner +import portage +import portage.debug def Usage(): """Print usage.""" print "Usage:" print " ./parallel_emerge [--board=BOARD] [--workon=PKGS] [--no-workon-deps]" - print " [emerge args] package" + print " [--rebuild] [emerge args] package" print print "Packages specified as workon packages are always built from source." print "Unless --no-workon-deps is specified, packages that depend on these" @@ -64,6 +98,9 @@ def Usage(): print "source. The build_packages script will automatically supply the" print "workon argument to emerge, ensuring that packages selected using" print "cros-workon are rebuilt." + print + print "The --rebuild option rebuilds packages whenever their dependencies" + print "are changed. This ensures that your build is correct." sys.exit(1) @@ -71,488 +108,1004 @@ def Usage(): # but will prevent the package from installing. secret_deps = {} -# Runtime flags. TODO(): Maybe make these command-line options or -# environment variables. -VERBOSE = False -AUTOCLEAN = False - # Global start time GLOBAL_START = time.time() -def ParseArgs(argv): - """Set global vars based on command line. +class EmergeData(object): + """This simple struct holds various emerge variables. - We need to be compatible with emerge arg format. - We scrape arguments that are specific to parallel_emerge, and pass through - the rest directly to emerge. - Args: - argv: arguments list - Returns: - triplet of (package list, emerge argumens, board string) - """ - if VERBOSE: - print argv - workon_set = set() - myopts = {} - myopts["workon"] = workon_set - emerge_args = [] - for arg in argv[1:]: - # Specifically match arguments that are specific to parallel_emerge, and - # pass through the rest. - if arg.startswith("--board="): - myopts["board"] = arg.replace("--board=", "") - elif arg.startswith("--workon="): - workon_str = arg.replace("--workon=", "") - workon_set.update(shlex.split(" ".join(shlex.split(workon_str)))) - elif arg == "--no-workon-deps": - myopts["no-workon-deps"] = True - else: - # Not a package name, so pass through to emerge. - emerge_args.append(arg) - - emerge_action, emerge_opts, emerge_files = _emerge.main.parse_opts( - emerge_args) - - return myopts, emerge_action, emerge_opts, emerge_files - - -def EmergeCommand(): - """Helper function to return the base emerge commandline. - - This is configured for board type, and including pass thru args, - using global variables. TODO(): Unglobalfy. - Returns: - string containing emerge command. - """ - emerge = "emerge" - if "board" in OPTS: - emerge += "-" + OPTS["board"] - cmd = [emerge] - for key, val in EMERGE_OPTS.items(): - if val is True: - cmd.append(key) - else: - cmd.extend([key, str(val)]) - return " ".join(cmd) - - -def GetDepsFromPortage(package): - """Get dependency tree info by running emerge. - - Run 'emerge -p --debug package', and get a text output of all deps. - TODO(): Put dep calculation in a library, as cros_extract_deps - also uses this code. - Args: - package: String containing the packages to build. - Returns: - Text output of emerge -p --debug, which can be processed elsewhere. - """ - print "Calculating deps for package %s" % package - cmdline = (EmergeCommand() + " -p --debug --color=n --with-bdeps=y " + - "--selective=n " + package) - if OPTS["workon"]: - cmdline += " " + " ".join(OPTS["workon"]) - print "+ %s" % cmdline - - # Store output in a temp file as it is too big for a unix pipe. - stderr_buffer = tempfile.TemporaryFile() - stdout_buffer = tempfile.TemporaryFile() - # Launch the subprocess. - start = time.time() - depsproc = subprocess.Popen(shlex.split(str(cmdline)), stderr=stderr_buffer, - stdout=stdout_buffer, bufsize=64*1024) - depsproc.wait() - seconds = time.time() - start - print "Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60) - stderr_buffer.seek(0) - stderr_raw = stderr_buffer.read() - info_start = stderr_raw.find("digraph") - stdout_buffer.seek(0) - stdout_raw = stdout_buffer.read() - lines = [] - if info_start != -1: - lines = stderr_raw[info_start:].split("\n") - lines.extend(stdout_raw.split("\n")) - if VERBOSE or depsproc.returncode != 0: - output = stderr_raw + stdout_raw - print output - if depsproc.returncode != 0: - print "Failed to generate deps" - sys.exit(1) - - return lines - - -def DepsToTree(lines): - """Regex the output from 'emerge --debug' to generate a nested dict of deps. - - Args: - lines: Output from 'emerge -p --debug package'. - Returns: - dep_tree: Nested dict of dependencies, as specified by emerge. - There may be dupes, or circular deps. - - We need to regex lines as follows: - hard-host-depends depends on - ('ebuild', '/', 'dev-lang/swig-1.3.36', 'merge') depends on - ('ebuild', '/', 'dev-lang/perl-5.8.8-r8', 'merge') (buildtime) - ('binary', '/.../rootfs/', 'sys-auth/policykit-0.9-r1', 'merge') depends on - ('binary', '/.../rootfs/', 'x11-misc/xbitmaps-1.1.0', 'merge') (no children) + This struct helps us easily pass emerge variables around as a unit. + These variables are used for calculating dependencies and installing + packages. """ - re_deps = re.compile(r"(?P\W*)\(\'(?P\w+)\', " - r"\'(?P[\w/\.-]+)\'," - r" \'(?P[\w\+-]+)/(?P[\w\+-]+)-" - r"(?P\d+[\w\.-]*)\', \'(?P\w+)\'\) " - r"(?P(depends on|\(.*\)))") - re_origdeps = re.compile(r"(?P[\w\+/=.<>~*-]+) depends on") - re_installed_package = re.compile( - r"\[(?P[^\]]*)\] " - r"(?P[\w\+-]+)/" - r"(?P[\w\+-]+)-" - r"(?P\d+[\w\.-]*)( \[" - r"(?P\d+[\w\.-]*)\])?" - ) - re_failed = re.compile(r".*\) depends on.*") - deps_tree = {} - deps_stack = [] - deps_info = {} - for line in lines: - m = re_deps.match(line) - m_orig = re_origdeps.match(line) - m_installed = re_installed_package.match(line) - if m: - pkgname = m.group("pkgname") - pkgdir = m.group("pkgdir") - pkgtype = m.group("pkgtype") - indent = m.group("indent") - doins = m.group("action") - deptype = m.group("deptype") - depth = 1 - if not indent: - depth = 0 - version = m.group("version") + __slots__ = ["action", "cmdline_packages", "depgraph", "mtimedb", "opts", + "root_config", "scheduler_graph", "settings", "spinner", + "trees"] - # If we are indented, we should have - # found a "depends on" previously. - if len(deps_stack) < depth: - print "FAIL: corrupt input at:" - print line - print "No Parent." + def __init__(self): + # The action the user requested. If the user is installing packages, this + # is None. If the user is doing anything other than installing packages, + # this will contain the action name, which will map exactly to the + # long-form name of the associated emerge option. + # + # Example: If you call parallel_emerge --unmerge package, the action name + # will be "unmerge" + self.action = None + + # The list of packages the user passed on the command-line. + self.cmdline_packages = None + + # The emerge dependency graph. It'll contain all the packages involved in + # this merge, along with their versions. + self.depgraph = None + + # A dict of the options passed to emerge. This dict has been cleaned up + # a bit by parse_opts, so that it's a bit easier for the emerge code to + # look at the options. + # + # Emerge takes a few shortcuts in its cleanup process to make parsing of + # the options dict easier. For example, if you pass in "--usepkg=n", the + # "--usepkg" flag is just left out of the dictionary altogether. Because + # --usepkg=n is the default, this makes parsing easier, because emerge + # can just assume that if "--usepkg" is in the dictionary, it's enabled. + # + # These cleanup processes aren't applied to all options. For example, the + # --with-bdeps flag is passed in as-is. For a full list of the cleanups + # applied by emerge, see the parse_opts function in the _emerge.main + # package. + self.opts = None + + # A dictionary used by portage to maintain global state. This state is + # loaded from disk when portage starts up, and saved to disk whenever we + # call mtimedb.commit(). + # + # This database contains information about global updates (i.e., what + # version of portage we have) and what we're currently doing. Portage + # saves what it is currently doing in this database so that it can be + # resumed when you call it with the --resume option. + # + # parallel_emerge does not save what it is currently doing in the mtimedb, + # so we do not support the --resume option. + self.mtimedb = None + + # The portage configuration for our current root. This contains the portage + # settings (see below) and the three portage trees for our current root. + # (The three portage trees are explained below, in the documentation for + # the "trees" member.) + self.root_config = None + + # The scheduler graph is used by emerge to calculate what packages to + # install. We don't actually install any deps, so this isn't really used, + # but we pass it in to the Scheduler object anyway. + self.scheduler_graph = None + + # Portage settings for our current session. Most of these settings are set + # in make.conf inside our current install root. + self.settings = None + + # The spinner, which spews stuff to stdout to indicate that portage is + # doing something. We maintain our own spinner, so we set the portage + # spinner to "silent" mode. + self.spinner = None + + # The portage trees. There are separate portage trees for each root. To get + # the portage tree for the current root, you can look in self.trees[root], + # where root = self.settings["ROOT"]. + # + # In each root, there are three trees: vartree, porttree, and bintree. + # - vartree: A database of the currently-installed packages. + # - porttree: A database of ebuilds, that can be used to build packages. + # - bintree: A database of binary packages. + self.trees = None + + +class DepGraphGenerator(object): + """Grab dependency information about packages from portage. + + Typical usage: + deps = DepGraphGenerator() + deps.Initialize(sys.argv[1:]) + deps_tree, deps_info = deps.GenDependencyTree() + deps_graph = deps.GenDependencyGraph(deps_tree, deps_info) + deps.PrintTree(deps_tree) + PrintDepsMap(deps_graph) + """ + + __slots__ = ["board", "emerge", "mandatory_source", "no_workon_deps", + "package_db", "rebuild"] + + def __init__(self): + self.board = None + self.emerge = EmergeData() + self.mandatory_source = set() + self.no_workon_deps = False + self.package_db = {} + self.rebuild = False + + def ParseParallelEmergeArgs(self, argv): + """Read the parallel emerge arguments from the command-line. + + We need to be compatible with emerge arg format. We scrape arguments that + are specific to parallel_emerge, and pass through the rest directly to + emerge. + Args: + argv: arguments list + Returns: + Arguments that don't belong to parallel_emerge + """ + emerge_args = [] + for arg in argv: + # Specifically match arguments that are specific to parallel_emerge, and + # pass through the rest. + if arg.startswith("--board="): + self.board = arg.replace("--board=", "") + elif arg.startswith("--workon="): + workon_str = arg.replace("--workon=", "") + package_list = shlex.split(" ".join(shlex.split(workon_str))) + self.mandatory_source.update(package_list) + elif arg == "--no-workon-deps": + self.no_workon_deps = True + elif arg == "--rebuild": + self.rebuild = True + else: + # Not one of our options, so pass through to emerge. + emerge_args.append(arg) + + if self.rebuild: + if self.no_workon_deps: + print "--rebuild is not compatible with --no-workon-deps" sys.exit(1) - # Go step by step through stack and tree - # until we find our parent. - updatedep = deps_tree - for i in range(0, depth): - updatedep = updatedep[deps_stack[i]]["deps"] + return emerge_args - # Pretty print what we've captured. - indent = "|" + "".ljust(depth, "_") - fullpkg = "%s/%s-%s" % (pkgdir, pkgname, version) - if VERBOSE: - print ("" + indent + " " + pkgdir + "/" + pkgname + " - " + - version + " (" + pkgtype + ", " + doins + - ", " + deptype + ")") + def Initialize(self, args): + """Initializer. Parses arguments and sets up portage state.""" - # Add our new package into the tree, if it's not already there. - updatedep.setdefault(fullpkg, {}) - # Add an empty deps for this new package. - updatedep[fullpkg].setdefault("deps", {}) - # Add the action we should take (merge, nomerge). - updatedep[fullpkg].setdefault("action", doins) - # Add the type of dep. - updatedep[fullpkg].setdefault("deptype", deptype) - # Add the long name of the package - updatedep[fullpkg].setdefault("pkgpath", "%s/%s" % (pkgdir, pkgname)) - # Add the short name of the package - updatedep[fullpkg].setdefault("pkgname", pkgname) + # Parse and strip out args that are just intended for parallel_emerge. + emerge_args = self.ParseParallelEmergeArgs(args) - # Drop any stack entries below our depth. - deps_stack = deps_stack[0:depth] - # Add ourselves to the end of the stack. - deps_stack.append(fullpkg) - elif m_orig: - # Also capture "pseudo packages", which are the freeform test - # we requested to be installed. These are generic package names - # like "chromeos" rather than chromeos/chromeos-0.0.1 - depth = 0 - # Tag these with "original" in case they overlap with real packages. - pkgname = "original-%s" % m_orig.group("pkgname") - # Insert this into the deps tree so so we can stick it in "world" - updatedep = deps_tree - for i in range(0, depth): - updatedep = updatedep[deps_stack[i]]["deps"] - if VERBOSE: - print pkgname - # Add our new package into the tree, if it's not already there. - updatedep.setdefault(pkgname, {}) - updatedep[pkgname].setdefault("deps", {}) - # Add the type of dep. - updatedep[pkgname].setdefault("action", "world") - updatedep[pkgname].setdefault("deptype", "normal") - updatedep[pkgname].setdefault("pkgpath", None) - updatedep[pkgname].setdefault("pkgname", None) + # Setup various environment variables based on our current board. These + # variables are normally setup inside emerge-${BOARD}, but since we don't + # call that script, we have to set it up here. These variables serve to + # point our tools at /build/BOARD and to setup cross compiles to the + # appropriate board as configured in toolchain.conf. + if self.board: + os.environ["PORTAGE_CONFIGROOT"] = "/build/" + self.board + os.environ["PORTAGE_SYSROOT"] = "/build/" + self.board + os.environ["SYSROOT"] = "/build/" + self.board + scripts_dir = os.path.dirname(os.path.realpath(__file__)) + toolchain_path = "%s/../overlays/overlay-%s/toolchain.conf" + f = open(toolchain_path % (scripts_dir, self.board)) + os.environ["CHOST"] = f.readline().strip() + f.close() - # Drop any obsolete stack entries. - deps_stack = deps_stack[0:depth] - # Add ourselves to the end of the stack. - deps_stack.append(pkgname) - elif m_installed: - pkgname = m_installed.group("pkgname") - pkgdir = m_installed.group("pkgdir") - version = m_installed.group("version") - oldversion = m_installed.group("oldversion") - desc = m_installed.group("desc") - uninstall = False - if oldversion and (desc.find("U") != -1 or desc.find("D") != -1): - uninstall = True - replace = desc.find("R") != -1 - fullpkg = "%s/%s-%s" % (pkgdir, pkgname, version) - deps_info[fullpkg] = {"idx": len(deps_info), - "pkgdir": pkgdir, - "pkgname": pkgname, - "oldversion": oldversion, - "uninstall": uninstall, - "replace": replace} - else: - # Is this a package that failed to match our huge regex? - m = re_failed.match(line) - if m: - print "\n".join(lines) - print "FAIL: Couldn't understand line:" - print line + # Although CHROMEOS_ROOT isn't specific to boards, it's normally setup + # inside emerge-${BOARD}, so we set it up here for compatibility. It + # will be going away soon as we migrate to CROS_WORKON_SRCROOT. + os.environ.setdefault("CHROMEOS_ROOT", os.environ["HOME"] + "/trunk") + + # Modify the environment to disable locking. + os.environ["PORTAGE_LOCKS"] = "false" + os.environ["UNMERGE_DELAY"] = "0" + + # Parse the emerge options. + action, opts, cmdline_packages = parse_opts(emerge_args) + + # If we're installing to the board, we want the --root-deps option so that + # portage will install the build dependencies to that location as well. + if self.board: + opts.setdefault("--root-deps", True) + + # Set environment variables based on options. Portage normally sets these + # environment variables in emerge_main, but we can't use that function, + # because it also does a bunch of other stuff that we don't want. + # TODO(davidjames): Patch portage to move this logic into a function we can + # reuse here. + if "--debug" in opts: + os.environ["PORTAGE_DEBUG"] = "1" + if "--config-root" in opts: + os.environ["PORTAGE_CONFIGROOT"] = opts["--config-root"] + if "--root" in opts: + os.environ["ROOT"] = opts["--root"] + if "--accept-properties" in opts: + os.environ["ACCEPT_PROPERTIES"] = opts["--accept-properties"] + + # Now that we've setup the necessary environment variables, we can load the + # emerge config from disk. + settings, trees, mtimedb = load_emerge_config() + + # Check whether our portage tree is out of date. Typically, this happens + # when you're setting up a new portage tree, such as in setup_board and + # make_chroot. In that case, portage applies a bunch of global updates + # here. Once the updates are finished, we need to commit any changes + # that the global update made to our mtimedb, and reload the config. + # + # Portage normally handles this logic in emerge_main, but again, we can't + # use that function here. + if portage._global_updates(trees, mtimedb["updates"]): + mtimedb.commit() + settings, trees, mtimedb = load_emerge_config(trees=trees) + + # Setup implied options. Portage normally handles this logic in + # emerge_main. + if "--buildpkgonly" in opts or "buildpkg" in settings.features: + opts.setdefault("--buildpkg", True) + if "--getbinpkgonly" in opts: + opts.setdefault("--usepkgonly", True) + opts.setdefault("--getbinpkg", True) + if "getbinpkg" in settings.features: + # Per emerge_main, FEATURES=getbinpkg overrides --getbinpkg=n + opts["--getbinpkg"] = True + if "--getbinpkg" in opts or "--usepkgonly" in opts: + opts.setdefault("--usepkg", True) + if "--fetch-all-uri" in opts: + opts.setdefault("--fetchonly", True) + if "--skipfirst" in opts: + opts.setdefault("--resume", True) + if "--buildpkgonly" in opts: + # --buildpkgonly will not merge anything, so it overrides all binary + # package options. + for opt in ("--getbinpkg", "--getbinpkgonly", + "--usepkg", "--usepkgonly"): + opts.pop(opt, None) + if (settings.get("PORTAGE_DEBUG", "") == "1" and + "python-trace" in settings.features): + portage.debug.set_trace(True) + + # Complain about unsupported options + for opt in ("--ask", "--ask-enter-invalid", "--complete-graph", + "--resume", "--skipfirst"): + if opt in opts: + print "%s is not supported by parallel_emerge" % opt sys.exit(1) - return deps_tree, deps_info + # Make emerge specific adjustments to the config (e.g. colors!) + adjust_configs(opts, trees) + # Save our configuration so far in the emerge object + emerge = self.emerge + emerge.action, emerge.opts = action, opts + emerge.settings, emerge.trees, emerge.mtimedb = settings, trees, mtimedb + emerge.cmdline_packages = cmdline_packages + root = settings["ROOT"] + emerge.root_config = trees[root]["root_config"] -def PrintTree(deps, depth=""): - """Print the deps we have seen in the emerge output. + def GenDependencyTree(self): + """Get dependency tree info from emerge. - Args: - deps: Dependency tree structure. - depth: Allows printing the tree recursively, with indentation. - """ - for entry in deps: - action = deps[entry]["action"] - print "%s %s (%s)" % (depth, entry, action) - PrintTree(deps[entry]["deps"], depth=depth + " ") - - -def GenDependencyGraph(deps_tree, deps_info, package_names): - """Generate a doubly linked dependency graph. - - Args: - deps_tree: Dependency tree structure. - deps_info: More details on the dependencies. - package_names: Names of packages to add to the world file. - Returns: - Deps graph in the form of a dict of packages, with each package - specifying a "needs" list and "provides" list. - """ - deps_map = {} - pkgpaths = {} - - def ReverseTree(packages): - """Convert tree to digraph. - - Take the tree of package -> requirements and reverse it to a digraph of - buildable packages -> packages they unblock. - Args: - packages: Tree(s) of dependencies. + TODO(): Update cros_extract_deps to also use this code. Returns: - Unsanitized digraph. + Dependency tree """ - for pkg in packages: - action = packages[pkg]["action"] - pkgpath = packages[pkg]["pkgpath"] - pkgname = packages[pkg]["pkgname"] - pkgpaths[pkgpath] = pkg - pkgpaths[pkgname] = pkg - this_pkg = deps_map.setdefault( - pkg, {"needs": {}, "provides": set(), "action": "nomerge", - "workon": False, "cmdline": False}) - if action != "nomerge": - this_pkg["action"] = action - this_pkg["deps_info"] = deps_info.get(pkg) - ReverseTree(packages[pkg]["deps"]) - for dep, dep_item in packages[pkg]["deps"].items(): - dep_pkg = deps_map[dep] - dep_type = dep_item["deptype"] - if dep_type != "(runtime_post)": - dep_pkg["provides"].add(pkg) - this_pkg["needs"][dep] = dep_type - - def RemoveInstalledPackages(): - """Remove installed packages, propagating dependencies.""" - - if "--selective" in EMERGE_OPTS: - selective = EMERGE_OPTS["--selective"] != "n" - else: - selective = "--noreplace" in EMERGE_OPTS or "--update" in EMERGE_OPTS - rm_pkgs = set(deps_map.keys()) - set(deps_info.keys()) - for pkg, info in deps_info.items(): - if selective and not deps_map[pkg]["workon"] and info["replace"]: - rm_pkgs.add(pkg) - for pkg in rm_pkgs: - this_pkg = deps_map[pkg] - if this_pkg["cmdline"] and "--oneshot" not in EMERGE_OPTS: - # If "cmdline" is set, this is a world update that was passed on the - # command-line. Keep these unless we're in --oneshot mode. - continue - needs = this_pkg["needs"] - provides = this_pkg["provides"] - for dep in needs: - dep_provides = deps_map[dep]["provides"] - dep_provides.update(provides) - dep_provides.discard(pkg) - dep_provides.discard(dep) - for target in provides: - target_needs = deps_map[target]["needs"] - target_needs.update(needs) - if pkg in target_needs: - del target_needs[pkg] - if target in target_needs: - del target_needs[target] - del deps_map[pkg] - - def SanitizeDep(basedep, currdep, visited, cycle): - """Search for circular deps between basedep and currdep, then recurse. - - Args: - basedep: Original dependency, top of stack. - currdep: Bottom of our current recursion, bottom of stack. - visited: Nodes visited so far. - cycle: Array where cycle of circular dependencies should be stored. - TODO(): Break RDEPEND preferentially. - Returns: - True iff circular dependencies are found. - """ - if currdep not in visited: - visited.add(currdep) - for dep in deps_map[currdep]["needs"]: - if dep == basedep or SanitizeDep(basedep, dep, visited, cycle): - cycle.insert(0, dep) - return True - return False - - def SanitizeTree(): - """Remove circular dependencies.""" start = time.time() - for basedep in deps_map: - this_pkg = deps_map[basedep] - if this_pkg["action"] == "world": - # world file updates can't be involved in cycles, - # and they don't have deps_info, so skip them. - continue - for dep in this_pkg["needs"].copy(): - cycle = [] - if (deps_info[basedep]["idx"] <= deps_info[dep]["idx"] and - SanitizeDep(basedep, dep, set(), cycle)): - cycle[:0] = [basedep, dep] - print "Breaking cycle:" - for i in range(len(cycle) - 1): - deptype = deps_map[cycle[i]]["needs"][cycle[i+1]] - print " %s -> %s %s" % (cycle[i], cycle[i+1], deptype) - del this_pkg["needs"][dep] - deps_map[dep]["provides"].remove(basedep) + + # Setup emerge options. + # + # We treat dependency info a bit differently than emerge itself. Unless + # you're using --usepkgonly, we disable --getbinpkg and --usepkg here so + # that emerge will look at the dependencies of the source ebuilds rather + # than the binary dependencies. This helps ensure that we have the option + # of merging a package from source, if we want to switch to it with + # --workon and the dependencies have changed. + emerge = self.emerge + emerge_opts = emerge.opts.copy() + emerge_opts.pop("--getbinpkg", None) + if "--usepkgonly" not in emerge_opts: + emerge_opts.pop("--usepkg", None) + if self.mandatory_source or self.rebuild: + # Enable --emptytree so that we get the full tree, which we need for + # dependency analysis. By default, with this option, emerge optimizes + # the graph by removing uninstall instructions from the graph. By + # specifying --tree as well, we tell emerge that it's not safe to remove + # uninstall instructions because we're planning on analyzing the output. + emerge_opts["--tree"] = True + emerge_opts["--emptytree"] = True + + # Create a list of packages to merge + packages = set(emerge.cmdline_packages[:]) + if self.mandatory_source: + packages.update(self.mandatory_source) + + # Tell emerge to be quiet. We print plenty of info ourselves so we don't + # need any extra output from portage. + portage.util.noiselimit = -1 + + # My favorite feature: The silent spinner. It doesn't spin. Ever. + # I'd disable the colors by default too, but they look kind of cool. + emerge.spinner = stdout_spinner() + emerge.spinner.update = emerge.spinner.update_quiet + + if "--quiet" not in emerge.opts: + print "Calculating deps..." + + # Ask portage to build a dependency graph. with the options we specified + # above. + params = create_depgraph_params(emerge_opts, emerge.action) + success, depgraph, _ = backtrack_depgraph( + emerge.settings, emerge.trees, emerge_opts, params, emerge.action, + packages, emerge.spinner) + emerge.depgraph = depgraph + + # Is it impossible to honor the user's request? Bail! + if not success: + depgraph.display_problems() + sys.exit(1) + + # Build our own tree from the emerge digraph. + deps_tree = {} + digraph = depgraph._dynamic_config.digraph + for node, node_deps in digraph.nodes.items(): + # Calculate dependency packages that need to be installed first. Each + # child on the digraph is a dependency. The "operation" field specifies + # what we're doing (e.g. merge, uninstall, etc.). The "priorities" array + # contains the type of dependency (e.g. build, runtime, runtime_post, + # etc.) + # + # Emerge itself actually treats some dependencies as "soft" dependencies + # and sometimes ignores them. We don't do that -- we honor all + # dependencies unless we're forced to prune them because they're cyclic. + # + # Portage refers to the identifiers for packages as a CPV. This acronym + # stands for Component/Path/Version. + # + # Here's an example CPV: chromeos-base/power_manager-0.0.1-r1 + # Split up, this CPV would be: + # C -- Component: chromeos-base + # P -- Path: power_manager + # V -- Version: 0.0.1-r1 + # + # We just refer to CPVs as packages here because it's easier. + deps = {} + for child, priorities in node_deps[0].items(): + deps[str(child.cpv)] = dict(action=str(child.operation), + deptype=str(priorities[-1]), + deps={}) + + # We've built our list of deps, so we can add our package to the tree. + if isinstance(node, Package): + deps_tree[str(node.cpv)] = dict(action=str(node.operation), + deps=deps) + + emptytree = "--emptytree" in emerge.opts + + # Ask portage for its install plan, so that we can only throw out + # dependencies that portage throws out. Also, keep track of the old + # versions of packages that we're either upgrading or replacing. + # + # The "vardb" is the database of installed packages. + vardb = emerge.trees[emerge.settings["ROOT"]]["vartree"].dbapi + deps_info = {} + for pkg in depgraph.altlist(): + if isinstance(pkg, Package): + # If we're not in emptytree mode, and we're going to replace a package + # that is already installed, then this operation is possibly optional. + # ("--selective" mode is handled later, in RemoveInstalledPackages()) + optional = False + if not emptytree and vardb.cpv_exists(pkg.cpv): + optional = True + + # Add the package to our database. + self.package_db[str(pkg.cpv)] = pkg + + # Save off info about the package + deps_info[str(pkg.cpv)] = {"idx": len(deps_info), + "optional": optional} + + # Delete the --tree option, because we don't really want to display a + # tree. We just wanted to get emerge to leave uninstall instructions on + # the graph. Later, when we display the graph, we'll want standard-looking + # output, so removing the --tree option is important. + depgraph._frozen_config.myopts.pop("--tree", None) + seconds = time.time() - start - print "Tree sanitized in %d:%04.1fs" % (seconds / 60, seconds % 60) + if "--quiet" not in emerge.opts: + print "Deps calculated in %dm%.1fs" % (seconds / 60, seconds % 60) - def AddSecretDeps(): - """Find these tagged packages and add extra dependencies. + return deps_tree, deps_info - For debugging dependency problems. + def PrintTree(self, deps, depth=""): + """Print the deps we have seen in the emerge output. + + Args: + deps: Dependency tree structure. + depth: Allows printing the tree recursively, with indentation. """ - for bad in secret_deps: - needed = secret_deps[bad] - bad_pkg = None - needed_pkg = None - for dep in deps_map: - if dep.find(bad) != -1: - bad_pkg = dep - if dep.find(needed) != -1: - needed_pkg = dep - if bad_pkg and needed_pkg: - deps_map[needed_pkg]["provides"].add(bad_pkg) - deps_map[bad_pkg]["needs"].add(needed_pkg) + for entry in sorted(deps): + action = deps[entry]["action"] + print "%s %s (%s)" % (depth, entry, action) + self.PrintTree(deps[entry]["deps"], depth=depth + " ") - def WorkOnChildren(pkg): - """Mark this package and all packages it provides as workon packages.""" + def GenDependencyGraph(self, deps_tree, deps_info): + """Generate a doubly linked dependency graph. - this_pkg = deps_map[pkg] - if this_pkg["workon"]: - return False + Args: + deps_tree: Dependency tree structure. + deps_info: More details on the dependencies. + Returns: + Deps graph in the form of a dict of packages, with each package + specifying a "needs" list and "provides" list. + """ + emerge = self.emerge + root = emerge.settings["ROOT"] - this_pkg["workon"] = True - updated = False - for w in this_pkg["provides"]: - if WorkOnChildren(w): - updated = True + # It's useful to know what packages will actually end up on the + # system at some point. Packages in final_db are either already + # installed, or will be installed by the time we're done. + final_db = emerge.depgraph._dynamic_config.mydbapi[root] - if this_pkg["action"] == "nomerge": - pkgpath = deps_tree[pkg]["pkgpath"] - if pkgpath is not None: - OPTS["workon"].add(pkgpath) - updated = True + # final_pkgs is a set of the packages we found in the final_db. These + # packages are either already installed, or will be installed by the time + # we're done. It's populated in BuildFinalPackageSet() + final_pkgs = set() - return updated + # deps_map is the actual dependency graph. + # + # Each package specifies a "needs" list and a "provides" list. The "needs" + # list indicates which packages we depend on. The "provides" list + # indicates the reverse dependencies -- what packages need us. + # + # We also provide some other information in the dependency graph: + # - action: What we're planning on doing with this package. Generally, + # "merge", "nomerge", or "uninstall" + # - mandatory_source: + # If true, indicates that this package must be compiled from source. + # We set this for "workon" packages, and for packages where the + # binaries are known to be out of date. + # - mandatory: + # If true, indicates that this package must be installed. We don't care + # whether it's binary or source, unless the mandatory_source flag is + # also set. + # + deps_map = {} - ReverseTree(deps_tree) - AddSecretDeps() + def ReverseTree(packages): + """Convert tree to digraph. - if "no-workon-deps" in OPTS: - for pkgpath in OPTS["workon"].copy(): - pkg = pkgpaths[pkgpath] - deps_map[pkg]["workon"] = True - else: - mergelist_updated = False - for pkgpath in OPTS["workon"].copy(): - pkg = pkgpaths[pkgpath] - if WorkOnChildren(pkg): - mergelist_updated = True - if mergelist_updated: - print "List of packages to merge updated. Recalculate dependencies..." - return None + Take the tree of package -> requirements and reverse it to a digraph of + buildable packages -> packages they unblock. + Args: + packages: Tree(s) of dependencies. + Returns: + Unsanitized digraph. + """ + for pkg in packages: - for pkgpath in package_names: - dep_pkg = deps_map.get("original-%s" % pkgpath) - if dep_pkg and len(dep_pkg["needs"]) == 1: - dep_pkg["cmdline"] = True + # Create an entry for the package + action = packages[pkg]["action"] + default_pkg = {"needs": {}, "provides": set(), "action": action, + "mandatory_source": False, "mandatory": False} + this_pkg = deps_map.setdefault(pkg, default_pkg) - RemoveInstalledPackages() - SanitizeTree() - return deps_map + # Create entries for dependencies of this package first. + ReverseTree(packages[pkg]["deps"]) + + # Add dependencies to this package. + for dep, dep_item in packages[pkg]["deps"].iteritems(): + dep_pkg = deps_map[dep] + dep_type = dep_item["deptype"] + if dep_type != "runtime_post": + dep_pkg["provides"].add(pkg) + this_pkg["needs"][dep] = dep_type + + def BuildFinalPackageSet(): + # If this package is installed, or will get installed, add it to + # final_pkgs + for pkg in deps_map: + for match in final_db.match_pkgs(pkg): + final_pkgs.add(str(match.cpv)) + + def FindCycles(): + """Find cycles in the dependency tree. + + Returns: + Dict of packages involved in cyclic dependencies, mapping each package + to a list of the cycles the package is involved in. + """ + + def FindCyclesAtNode(pkg, cycles, unresolved, resolved): + """Find cycles in cyclic dependencies starting at specified package. + + Args: + pkg: Package identifier. + cycles: Set of cycles so far. + unresolved: Nodes that have been visited but are not fully processed. + resolved: Nodes that have been visited and are fully processed. + Returns: + Whether a cycle was found. + """ + if pkg in resolved: + return + unresolved.append(pkg) + for dep in deps_map[pkg]["needs"]: + if dep in unresolved: + idx = unresolved.index(dep) + mycycle = unresolved[idx:] + [dep] + for cycle_pkg in mycycle: + info = cycles.setdefault(cycle_pkg, {}) + info.setdefault("pkgs", set()).update(mycycle) + info.setdefault("cycles", []).append(mycycle) + else: + FindCyclesAtNode(dep, cycles, unresolved, resolved) + unresolved.pop() + resolved.add(pkg) + + cycles, unresolved, resolved = {}, [], set() + for pkg in deps_map: + FindCyclesAtNode(pkg, cycles, unresolved, resolved) + return cycles + + def RemoveInstalledPackages(): + """Remove installed packages, propagating dependencies.""" + + # If we're not in selective mode, the packages on the command line are + # not optional. + if "--selective" in emerge.opts: + selective = emerge.opts["--selective"] != "n" + else: + selective = "--noreplace" in emerge.opts or "--update" in emerge.opts + if not selective: + for pkg in emerge.cmdline_packages: + for db_pkg in final_db.match_pkgs(pkg): + deps_info[db_pkg.cpv]["optional"] = False + + # Schedule packages that aren't on the install list for removal + rm_pkgs = set(deps_map.keys()) - set(deps_info.keys()) + + # Schedule optional packages for removal + for pkg, info in deps_info.items(): + if info["optional"]: + rm_pkgs.add(pkg) + + # Remove the packages we don't want, simplifying the graph and making + # it easier for us to crack cycles. + for pkg in sorted(rm_pkgs): + this_pkg = deps_map[pkg] + needs = this_pkg["needs"] + provides = this_pkg["provides"] + for dep in needs: + dep_provides = deps_map[dep]["provides"] + dep_provides.update(provides) + dep_provides.discard(pkg) + dep_provides.discard(dep) + for target in provides: + target_needs = deps_map[target]["needs"] + target_needs.update(needs) + target_needs.pop(pkg, None) + target_needs.pop(target, None) + del deps_map[pkg] + + def SanitizeTree(cycles): + """Remove circular dependencies. + + We only prune circular dependencies that go against the emerge ordering. + This has a nice property: we're guaranteed to merge dependencies in the + same order that portage does. + + Because we don't treat any dependencies as "soft" unless they're killed + by a cycle, we pay attention to a larger number of dependencies when + merging. This hurts performance a bit, but helps reliability. + + Args: + cycles: Dict of packages involved in cyclic dependencies, mapping each + package to a list of the cycles the package is involved in. Produced + by FindCycles(). + """ + for basedep in set(cycles).intersection(deps_map): + this_pkg = deps_map[basedep] + for dep in this_pkg["provides"].intersection(cycles[basedep]["pkgs"]): + if deps_info[basedep]["idx"] >= deps_info[dep]["idx"]: + for mycycle in cycles[basedep]["cycles"]: + if dep in mycycle: + print "Breaking %s -> %s in cycle:" % (dep, basedep) + for i in range(len(mycycle) - 1): + needs = deps_map[mycycle[i]]["needs"] + deptype = needs.get(mycycle[i+1], "deleted") + print " %s -> %s (%s)" % (mycycle[i], mycycle[i+1], deptype) + del deps_map[dep]["needs"][basedep] + this_pkg["provides"].remove(dep) + break + + def AddSecretDeps(): + """Find these tagged packages and add extra dependencies. + + For debugging dependency problems. + """ + for bad in secret_deps: + needed = secret_deps[bad] + bad_pkg = None + needed_pkg = None + for dep in deps_map: + if dep.find(bad) != -1: + bad_pkg = dep + if dep.find(needed) != -1: + needed_pkg = dep + if bad_pkg and needed_pkg: + deps_map[needed_pkg]["provides"].add(bad_pkg) + deps_map[bad_pkg]["needs"][needed_pkg] = "secret" + + def MergeChildren(pkg, merge_type): + """Merge this package and all packages it provides.""" + + this_pkg = deps_map[pkg] + if this_pkg[merge_type] or pkg not in final_pkgs: + return set() + + # Mark this package as non-optional + deps_info[pkg]["optional"] = False + this_pkg[merge_type] = True + for w in this_pkg["provides"]: + MergeChildren(w, merge_type) + + if this_pkg["action"] == "nomerge": + this_pkg["action"] = "merge" + + def RemotePackageDatabase(): + """Grab the latest binary package database from the prebuilt server. + + We need to know the modification times of the prebuilt packages so that we + know when it is OK to use these packages and when we should rebuild them + instead. + + Returns: + A dict mapping package identifiers to modification times. + """ + url = self.emerge.settings["PORTAGE_BINHOST"] + "/Packages" + + prebuilt_pkgs = {} + f = urllib2.urlopen(url) + for line in f: + if line.startswith("CPV: "): + pkg = line.replace("CPV: ", "").rstrip() + elif line.startswith("MTIME: "): + prebuilt_pkgs[pkg] = int(line[:-1].replace("MTIME: ", "")) + f.close() + + return prebuilt_pkgs + + def LocalPackageDatabase(): + """Get the modification times of the packages in the local database. + + We need to know the modification times of the local packages so that we + know when they need to be rebuilt. + + Returns: + A dict mapping package identifiers to modification times. + """ + if self.board: + path = "/build/%s/packages/Packages" % self.board + else: + path = "/var/lib/portage/pkgs/Packages" + local_pkgs = {} + for line in file(path): + if line.startswith("CPV: "): + pkg = line.replace("CPV: ", "").rstrip() + elif line.startswith("MTIME: "): + local_pkgs[pkg] = int(line[:-1].replace("MTIME: ", "")) + + return local_pkgs + + def AutoRebuildDeps(local_pkgs, remote_pkgs, cycles): + """Recursively rebuild packages when necessary using modification times. + + If you've modified a package, it's a good idea to rebuild all the packages + that depend on it from source. This function looks for any packages which + depend on packages that have been modified and ensures that they get + rebuilt. + + Args: + local_pkgs: Modification times from the local database. + remote_pkgs: Modification times from the prebuilt server. + cycles: Dictionary returned from FindCycles() + + Returns: + The set of packages we marked as needing to be merged. + """ + + def PrebuiltsReady(pkg, pkg_db, cache): + """Check whether the prebuilts are ready for pkg and all deps. + + Args: + pkg: The specified package. + pkg_db: The package DB to use. + cache: A dict, where the results are stored. + + Returns: + True iff the prebuilts are ready for pkg and all deps. + """ + if pkg in cache: + return cache[pkg] + if pkg not in pkg_db: + cache[pkg] = False + else: + for dep in deps_map[pkg]["needs"]: + if not PrebuiltsReady(dep, pkg_db, cache): + cache[pkg] = False + break + return cache.setdefault(pkg, True) + + def LastModifiedWithDeps(pkg, pkg_db, cache): + """Calculate the last modified time of a package and its dependencies. + + This function looks at all the packages needed by the specified package + and checks the most recent modification time of all of those packages. + If the dependencies of a package were modified more recently than the + package itself, then we know the package needs to be rebuilt. + + Args: + pkg: The specified package. + pkg_db: The package DB to use. + cache: A dict, where the last modified times are stored. + + Returns: + The last modified time of the specified package and its dependencies. + """ + if pkg in cache: + return cache[pkg] + + cache[pkg] = pkg_db.get(pkg, 0) + for dep in deps_map[pkg]["needs"]: + t = LastModifiedWithDeps(dep, pkg_db, cache) + cache[pkg] = max(cache[pkg], t) + return cache[pkg] + + # For every package that's getting updated in our local cache (binary + # or source), make sure we also update the children. If a package is + # built from source, all children must also be built from source. + local_ready_cache, remote_ready_cache = {}, {} + local_mtime_cache, remote_mtime_cache = {}, {} + for pkg in final_pkgs: + # If all the necessary local packages are ready, and their + # modification times are in sync, we don't need to do anything here. + local_mtime = LastModifiedWithDeps(pkg, local_pkgs, local_mtime_cache) + local_ready = PrebuiltsReady(pkg, local_pkgs, local_ready_cache) + if (not local_ready or local_pkgs.get(pkg, 0) < local_mtime and + pkg not in cycles): + # OK, at least one package is missing from the local cache or is + # outdated. This means we're going to have to install the package + # and all dependencies. + # + # If all the necessary remote packages are ready, and they're at + # least as new as our local packages, we can install them. + # Otherwise, we need to build from source. + remote_mtime = LastModifiedWithDeps(pkg, remote_pkgs, + remote_mtime_cache) + remote_ready = PrebuiltsReady(pkg, remote_pkgs, remote_ready_cache) + if remote_ready and (local_mtime <= remote_mtime or pkg in cycles): + MergeChildren(pkg, "mandatory") + else: + MergeChildren(pkg, "mandatory_source") + + def UsePrebuiltPackages(): + """Update packages that can use prebuilts to do so.""" + start = time.time() + + # The bintree is the database of binary packages. By default, it's + # empty. + bintree = emerge.trees[root]["bintree"] + bindb = bintree.dbapi + root_config = emerge.root_config + prebuilt_pkgs = {} + + # Populate the DB with packages + bintree.populate("--getbinpkg" in emerge.opts, + "--getbinpkgonly" in emerge.opts) + + # Update packages that can use prebuilts to do so. + for pkg, info in deps_map.iteritems(): + if info and not info["mandatory_source"] and info["action"] == "merge": + db_keys = list(bindb._aux_cache_keys) + try: + db_vals = bindb.aux_get(pkg, db_keys + ["MTIME"]) + except KeyError: + # No binary package + continue + mtime = int(db_vals.pop() or 0) + metadata = zip(db_keys, db_vals) + db_pkg = Package(built=True, cpv=pkg, installed=False, + metadata=metadata, onlydeps=False, mtime=mtime, + operation="merge", root_config=root_config, + type_name="binary") + self.package_db[pkg] = db_pkg + + seconds = time.time() - start + if "--quiet" not in emerge.opts: + print "Prebuilt DB populated in %dm%.1fs" % (seconds / 60, seconds % 60) + + return prebuilt_pkgs + + def AddRemainingPackages(): + """Fill in packages that don't have entries in the package db. + + Every package we are installing needs an entry in the package db. + This function should only be called after we have removed the + packages that are not being merged from our deps_map. + """ + for pkg in deps_map: + if pkg not in self.package_db: + if deps_map[pkg]["action"] != "merge": + # We should only fill in packages that are being merged. If + # there's any other packages here, something funny is going on. + print "Missing entry for %s in package db" % pkg + sys.exit(1) + + db_pkg = emerge.depgraph._pkg(pkg, "ebuild", emerge.root_config) + self.package_db[pkg] = db_pkg + + ReverseTree(deps_tree) + BuildFinalPackageSet() + AddSecretDeps() + + if self.no_workon_deps: + for pkg in self.mandatory_source.copy(): + for db_pkg in final_db.match_pkgs(pkg): + deps_map[str(db_pkg.cpv)]["mandatory_source"] = True + else: + for pkg in self.mandatory_source.copy(): + for db_pkg in final_db.match_pkgs(pkg): + MergeChildren(str(db_pkg.cpv), "mandatory_source") + + cycles = FindCycles() + if self.rebuild: + local_pkgs = LocalPackageDatabase() + remote_pkgs = RemotePackageDatabase() + AutoRebuildDeps(local_pkgs, remote_pkgs, cycles) + + # We need to remove installed packages so that we can use the dependency + # ordering of the install process to show us what cycles to crack. Once + # we've done that, we also need to recalculate our list of cycles so that + # we don't include the installed packages in our cycles. + RemoveInstalledPackages() + cycles = FindCycles() + SanitizeTree(cycles) + if deps_map: + if "--usepkg" in emerge.opts: + UsePrebuiltPackages() + AddRemainingPackages() + return deps_map + + def PrintInstallPlan(self, deps_map): + """Print an emerge-style install plan. + + The install plan lists what packages we're installing, in order. + It's useful for understanding what parallel_emerge is doing. + + Args: + deps_map: The dependency graph. + """ + + def InstallPlanAtNode(target, deps_map): + nodes = [] + nodes.append(target) + for dep in deps_map[target]["provides"]: + del deps_map[dep]["needs"][target] + if not deps_map[dep]["needs"]: + nodes.extend(InstallPlanAtNode(dep, deps_map)) + return nodes + + deps_map = copy.deepcopy(deps_map) + install_plan = [] + plan = set() + for target, info in deps_map.iteritems(): + if not info["needs"] and target not in plan: + for item in InstallPlanAtNode(target, deps_map): + plan.add(item) + install_plan.append(self.package_db[item]) + + self.emerge.depgraph.display(install_plan) def PrintDepsMap(deps_map): """Print dependency graph, for each package list it's prerequisites.""" for i in deps_map: print "%s: (%s) needs" % (i, deps_map[i]["action"]) - for j in deps_map[i]["needs"]: + needs = deps_map[i]["needs"] + for j in needs: print " %s" % (j) + if not needs: + print " no dependencies" + + +def EmergeWorker(task_queue, done_queue, emerge, package_db): + """This worker emerges any packages given to it on the task_queue. + + Args: + task_queue: The queue of tasks for this worker to do. + done_queue: The queue of results from the worker. + emerge: An EmergeData() object. + package_db: A dict, mapping package ids to portage Package objects. + + It expects package identifiers to be passed to it via task_queue. When + the package is merged, it pushes (target, retval, outputstr) into the + done_queue. + """ + + settings, trees, mtimedb = emerge.settings, emerge.trees, emerge.mtimedb + opts, spinner = emerge.opts, emerge.spinner + opts["--nodeps"] = True + while True: + target = task_queue.get() + print "Emerging", target + db_pkg = package_db[target] + db_pkg.root_config = emerge.root_config + install_list = [db_pkg] + output = tempfile.TemporaryFile() + outputstr = "" + if "--pretend" in opts: + retval = 0 + else: + save_stdout = sys.stdout + save_stderr = sys.stderr + try: + sys.stdout = output + sys.stderr = output + scheduler = Scheduler(settings, trees, mtimedb, opts, spinner, + install_list, [], emerge.scheduler_graph) + retval = scheduler.merge() + finally: + sys.stdout = save_stdout + sys.stderr = save_stderr + if retval is None: + retval = 0 + if retval != 0: + output.seek(0) + outputstr = output.read() + + done_queue.put((target, retval, outputstr)) class EmergeQueue(object): """Class to schedule emerge jobs according to a dependency graph.""" - def __init__(self, deps_map): + def __init__(self, deps_map, emerge, package_db): # Store the dependency graph. self._deps_map = deps_map - # Initialize the runnable queue to empty. - self._jobs = [] + # Initialize the running queue to empty + self._jobs = set() # List of total package installs represented in deps_map. install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"] self._total_jobs = len(install_jobs) - # Initialize the ready queue, these are jobs with no unmet dependencies. - self._emerge_queue = [x for x in deps_map if not deps_map[x]["needs"]] + if "--pretend" in emerge.opts: + print "Skipping merge because of --pretend mode." + sys.exit(0) + + # Setup scheduler graph object. This is used by the child processes + # to help schedule jobs. + emerge.scheduler_graph = emerge.depgraph.schedulerGraph() + + procs = min(self._total_jobs, + emerge.opts.get("--jobs", multiprocessing.cpu_count())) + self._emerge_queue = multiprocessing.Queue() + self._done_queue = multiprocessing.Queue() + args = (self._emerge_queue, self._done_queue, emerge, package_db) + self._pool = multiprocessing.Pool(procs, EmergeWorker, args) + # Initialize the failed queue to empty. self._retry_queue = [] self._failed = {} + # Print an update before we launch the merges. + self._Status() + + for target, info in deps_map.items(): + if not info["needs"]: + self._Schedule(target) + + def _Schedule(self, target): + # We maintain a tree of all deps, if this doesn't need + # to be installed just free up it's children and continue. + # It is possible to reinstall deps of deps, without reinstalling + # first level deps, like so: + # chromeos (merge) -> eselect (nomerge) -> python (merge) + if self._deps_map[target]["action"] == "nomerge": + self._Finish(target) + else: + # Kick off the build if it's marked to be built. + self._jobs.add(target) + self._emerge_queue.put(target) + def _LoadAvg(self): loads = open("/proc/loadavg", "r").readline().split()[:3] return " ".join(loads) @@ -561,95 +1114,24 @@ class EmergeQueue(object): """Print status.""" seconds = time.time() - GLOBAL_START line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s " - "[Time %dm%ds Load %s]") - print line % (len(self._deps_map), len(self._emerge_queue), - len(self._jobs), len(self._retry_queue), self._total_jobs, + "[Time %dm%.1fs Load %s]") + qsize = self._emerge_queue.qsize() + print line % (len(self._deps_map), qsize, len(self._jobs) - qsize, + len(self._retry_queue), self._total_jobs, seconds / 60, seconds % 60, self._LoadAvg()) - def _LaunchOneEmerge(self, target, action): - """Run emerge --nodeps to do a single package install. - - If this is a pseudopackage, that means we're done, and can select in in the - world file. - Args: - target: The full package name of the package to install. - eg. "sys-apps/portage-2.17" - Returns: - Triplet containing (target name, subprocess object, output buffer object). - """ - if target.startswith("original-"): - # "original-" signifies one of the packages we originally requested. - # Since we have explicitly installed the versioned package as a dep of - # this, we only need to tag in "world" that we are done with this - # install request. - # --nodeps: Ignore dependencies -- we handle them internally. - # --noreplace: Don't replace or upgrade any packages. (In this case, the - # package is already installed, so we are just updating the - # world file.) - # --selective: Make sure that --noreplace sticks even if --selective=n is - # specified by the user on the command-line. - # NOTE: If the user specifies --oneshot on the command-line, this command - # will do nothing. That is desired, since the user requested not to - # update the world file. - newtarget = target.replace("original-", "") - cmdline = (EmergeCommand() + " --nodeps --selective --noreplace " + - newtarget) - elif action == "uninstall": - cmdline = EmergeCommand() + " --nodeps --unmerge =" + target - else: - # This package is a dependency of something we specifically - # requested. Therefore we should install it but not allow it - # in the "world" file, which represents explicit installs. - # --oneshot" here will prevent it from being tagged in world. - cmdline = EmergeCommand() + " --nodeps --oneshot " - this_pkg = self._deps_map[target] - if this_pkg["workon"]: - # --usepkg=n --usepkgonly=n --getbinpkg=n - # --getbinpkgonly=n: Build from source - # --selective=n: Re-emerge even if package is already installed. - cmdline += ("--usepkg=n --usepkgonly=n --getbinpkg=n " - "--getbinpkgonly=n --selective=n ") - cmdline += "=" + target - deps_info = this_pkg["deps_info"] - if deps_info["uninstall"]: - package = "%(pkgdir)s/%(pkgname)s-%(oldversion)s" % deps_info - cmdline += " && %s -C =%s" % (EmergeCommand(), package) - - print "+ %s" % cmdline - - # Store output in a temp file as it is too big for a unix pipe. - stdout_buffer = tempfile.TemporaryFile() - # Modify the environment to disable locking. - portage_env = os.environ.copy() - portage_env["PORTAGE_LOCKS"] = "false" - portage_env["UNMERGE_DELAY"] = "0" - # Autoclean rummages around in the portage database and uninstalls - # old packages. It's not parallel safe, so we skip it. Instead, we - # handle the cleaning ourselves by uninstalling old versions of any - # new packages we install. - if not AUTOCLEAN: - portage_env["AUTOCLEAN"] = "no" - # Launch the subprocess. - emerge_proc = subprocess.Popen( - cmdline, shell=True, stdout=stdout_buffer, - stderr=subprocess.STDOUT, bufsize=64*1024, env=portage_env) - - return (target, emerge_proc, stdout_buffer) - def _Finish(self, target): """Mark a target as completed and unblock dependecies.""" for dep in self._deps_map[target]["provides"]: del self._deps_map[dep]["needs"][target] if not self._deps_map[dep]["needs"]: - if VERBOSE: - print "Unblocking %s" % dep - self._emerge_queue.append(dep) + self._Schedule(dep) self._deps_map.pop(target) def _Retry(self): if self._retry_queue: target = self._retry_queue.pop(0) - self._emerge_queue.append(target) + self._Schedule(target) print "Retrying emerge of %s." % target def Run(self): @@ -658,37 +1140,10 @@ class EmergeQueue(object): Keep running so long as we have uninstalled packages in the dependency graph to merge. """ - secs = 0 - max_jobs = EMERGE_OPTS.get("--jobs", 256) while self._deps_map: - # If we have packages that are ready, kick them off. - if self._emerge_queue and len(self._jobs) < max_jobs: - target = self._emerge_queue.pop(0) - action = self._deps_map[target]["action"] - # We maintain a tree of all deps, if this doesn't need - # to be installed just free up it's children and continue. - # It is possible to reinstall deps of deps, without reinstalling - # first level deps, like so: - # chromeos (merge) -> eselect (nomerge) -> python (merge) - if action == "nomerge": - self._Finish(target) - else: - # Kick off the build if it's marked to be built. - print "Emerging %s (%s)" % (target, action) - job = self._LaunchOneEmerge(target, action) - # Append it to the active jobs list. - self._jobs.append(job) - continue - # Wait a bit to see if maybe some jobs finish. You can't - # wait on a set of jobs in python, so we'll just poll. - time.sleep(1) - secs += 1 - if secs % 30 == 0: - # Print an update. - self._Status() - # Check here that we are actually waiting for something. - if (not self._emerge_queue and + if (self._emerge_queue.empty() and + self._done_queue.empty() and not self._jobs and self._deps_map): # If we have failed on a package, retry it now. @@ -708,92 +1163,104 @@ class EmergeQueue(object): PrintDepsMap(self._deps_map) sys.exit(1) - # Check every running job to see if we've finished any jobs. - for target, job, stdout in self._jobs: - # Is it done? - if job.poll() is not None: - # Clean up the subprocess. - job.wait() - # Get the output if we want to print it. - stdout.seek(0) - output = stdout.read() + try: + target, retcode, output = self._done_queue.get(timeout=5) + except Queue.Empty: + # Print an update. + self._Status() + continue - # Remove from active jobs list, we are done with this process. - self._jobs.remove((target, job, stdout)) + self._jobs.discard(target) - # Print if necessary. - if VERBOSE or job.returncode != 0: - print output - if job.returncode != 0: - # Handle job failure. - if target in self._failed: - # If this job has failed previously, give up. - print "Failed %s. Your build has failed." % target - else: - # Queue up this build to try again after a long while. - self._retry_queue.append(target) - self._failed[target] = output - print "Failed %s, retrying later." % target - else: - if target in self._failed and self._retry_queue: - # If we have successfully retried a failed package, and there - # are more failed packages, try the next one. We will only have - # one retrying package actively running at a time. - self._Retry() + # Print if necessary. + if retcode != 0: + print output + if retcode != 0: + # Handle job failure. + if target in self._failed: + # If this job has failed previously, give up. + print "Failed %s. Your build has failed." % target + else: + # Queue up this build to try again after a long while. + self._retry_queue.append(target) + self._failed[target] = 1 + print "Failed %s, retrying later." % target + else: + if target in self._failed and self._retry_queue: + # If we have successfully retried a failed package, and there + # are more failed packages, try the next one. We will only have + # one retrying package actively running at a time. + self._Retry() - print "Completed %s" % target - # Mark as completed and unblock waiting ebuilds. - self._Finish(target) + print "Completed %s" % target + # Mark as completed and unblock waiting ebuilds. + self._Finish(target) - # Print an update. - self._Status() + # Print an update. + self._Status() -# Main control code. -OPTS, EMERGE_ACTION, EMERGE_OPTS, EMERGE_FILES = ParseArgs(sys.argv) +def main(): -if EMERGE_ACTION is not None: - # Pass action arguments straight through to emerge - EMERGE_OPTS["--%s" % EMERGE_ACTION] = True - sys.exit(os.system(EmergeCommand() + " " + " ".join(EMERGE_FILES))) -elif not EMERGE_FILES: - Usage() - sys.exit(1) + deps = DepGraphGenerator() + deps.Initialize(sys.argv[1:]) + emerge = deps.emerge -print "Starting fast-emerge." -print " Building package %s on %s" % (" ".join(EMERGE_FILES), - OPTS.get("board", "root")) + if emerge.action is not None: + sys.argv = deps.ParseParallelEmergeArgs(sys.argv) + sys.exit(emerge_main()) + elif not emerge.cmdline_packages: + Usage() + sys.exit(1) -# If the user supplied the --workon option, we may have to run emerge twice -# to generate a dependency ordering for packages that depend on the workon -# packages. -for it in range(2): - print "Running emerge to generate deps" - deps_output = GetDepsFromPortage(" ".join(EMERGE_FILES)) + # Unless we're in pretend mode, there's not much point running without + # root access. We need to be able to install packages. + # + # NOTE: Even if you're running --pretend, it's a good idea to run + # parallel_emerge with root access so that portage can write to the + # dependency cache. This is important for performance. + if "--pretend" not in emerge.opts and portage.secpass < 2: + print "parallel_emerge: superuser access is required." + sys.exit(1) - print "Processing emerge output" - dependency_tree, dependency_info = DepsToTree(deps_output) + if "--quiet" not in emerge.opts: + cmdline_packages = " ".join(emerge.cmdline_packages) + print "Starting fast-emerge." + print " Building package %s on %s" % (cmdline_packages, + deps.board or "root") - if VERBOSE: - print "Print tree" - PrintTree(dependency_tree) + deps_tree, deps_info = deps.GenDependencyTree() - print "Generate dependency graph." - dependency_graph = GenDependencyGraph(dependency_tree, dependency_info, - EMERGE_FILES) + # You want me to be verbose? I'll give you two trees! Twice as much value. + if "--tree" in emerge.opts and "--verbose" in emerge.opts: + deps.PrintTree(deps_tree) - if dependency_graph is not None: - break -else: - print "Can't crack cycle" - sys.exit(1) + deps_graph = deps.GenDependencyGraph(deps_tree, deps_info) -if VERBOSE: - PrintDepsMap(dependency_graph) + # OK, time to print out our progress so far. + deps.PrintInstallPlan(deps_graph) + if "--tree" in emerge.opts: + PrintDepsMap(deps_graph) -# Run the queued emerges. -scheduler = EmergeQueue(dependency_graph) -scheduler.Run() + # Run the queued emerges. + scheduler = EmergeQueue(deps_graph, emerge, deps.package_db) + scheduler.Run() -print "Done" + # Update world. + if ("--oneshot" not in emerge.opts and + "--pretend" not in emerge.opts): + world_set = emerge.root_config.sets["selected"] + new_world_pkgs = [] + root = emerge.settings["ROOT"] + final_db = emerge.depgraph._dynamic_config.mydbapi[root] + for pkg in emerge.cmdline_packages: + for db_pkg in final_db.match_pkgs(pkg): + print "Adding %s to world" % db_pkg.cp + new_world_pkgs.append(db_pkg.cp) + if new_world_pkgs: + world_set.update(new_world_pkgs) + print "Done" + +if __name__ == "__main__": + main()