Print status messages in parallel_emerge about jobs that are longer than 2 mins.

Currently, if a build is slow, you only get debug output after an hour.
This is to allow for uncluttered output. If output is too cluttered, it's hard
to distinguish regular output from errors.

The problem with this approach is that it's often hard to debug why the build
is slow. Now that Chrome builds by default, it takes over an hour to build,
and people see little indication as to why. You can show the output with
build_packages --showoutput, but that is often too verbose and clutters the
output too much.

Here's an example log snippet that is hard to debug:

Pending 2, Ready 0, Running 1, Retrying 0, Total 22 [Time 5m20.5s Load 3.69 3.04 2.66]
Pending 2, Ready 0, Running 1, Retrying 0, Total 22 [Time 5m25.5s Load 3.40 2.99 2.65]
... yada yada yada ...
Pending 2, Ready 0, Running 1, Retrying 0, Total 22 [Time 45m32.9s Load 1.00 1.18 2.95]

From the above output, we see that a package is building for a long time, but
we don't know what package. We should output the package name every two minutes
at least so people know what package is taking so long. That's what this change
implements.

BUG=chromium-os:8575
TEST=Confirmed new status appear for regular build_packages. Confirmed
     build_packages --showoutput is unchanged.

Change-Id: Ie18b23ac7a8a6e2c24b43ec3691606c7da5e43cb

Review URL: http://codereview.chromium.org/4318003
This commit is contained in:
David James 2010-11-02 14:52:23 -07:00
parent abb9144d97
commit 0addea30fd

View File

@ -1235,8 +1235,9 @@ def PrintDepsMap(deps_map):
class EmergeJobState(object):
__slots__ = ["done", "filename", "last_output_seek", "last_output_timestamp",
"pkgname", "retcode", "start_timestamp", "target"]
__slots__ = ["done", "filename", "last_notify_timestamp", "last_output_seek",
"last_output_timestamp", "pkgname", "retcode", "start_timestamp",
"target"]
def __init__(self, target, pkgname, done, filename, start_timestamp,
retcode=None):
@ -1254,6 +1255,11 @@ class EmergeJobState(object):
# The filename where output is currently stored.
self.filename = filename
# The timestamp of the last time we printed the name of the log file. We
# print this at the beginning of the job, so this starts at
# start_timestamp.
self.last_notify_timestamp = start_timestamp
# The location (in bytes) of the end of the last complete line we printed.
# This starts off at zero. We use this to jump to the right place when we
# print output from the same ebuild multiple times.
@ -1549,26 +1555,43 @@ class EmergeQueue(object):
def _Status(self):
"""Print status."""
current_time = time.time()
seconds = current_time - GLOBAL_START
line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s "
"[Time %dm%.1fs Load %s]")
qsize = self._emerge_queue.qsize()
self._Print(line % (len(self._deps_map), qsize, len(self._jobs) - qsize,
len(self._retry_queue), self._total_jobs,
seconds / 60, seconds % 60, self._LoadAvg()))
no_output = True
# Print interim output every minute if --show-output is used. Otherwise,
# only print output if a job has been running for 60 minutes or more.
# print notifications about running packages every 2 minutes, and print
# full output for jobs that have been running for 60 minutes or more.
if self._show_output:
interval = 60
notify_interval = 0
else:
interval = 60 * 60
notify_interval = 60 * 2
for target, job in self._jobs.iteritems():
if job:
last_timestamp = max(job.start_timestamp, job.last_output_timestamp)
if last_timestamp + interval < current_time:
self._print_queue.put(JobPrinter(job))
job.last_output_timestamp = current_time
no_output = False
elif (notify_interval and
job.last_notify_timestamp + notify_interval < current_time):
job_seconds = current_time - job.start_timestamp
args = (job.pkgname, job_seconds / 60, job_seconds % 60, job.filename)
info = "Still building %s (%dm%.1fs). Logs in %s" % args
job.last_notify_timestamp = current_time
self._Print(info)
no_output = False
# If we haven't printed any messages yet, print a general status message
# here.
if no_output:
seconds = current_time - GLOBAL_START
line = ("Pending %s, Ready %s, Running %s, Retrying %s, Total %s "
"[Time %dm%.1fs Load %s]")
qsize = self._emerge_queue.qsize()
self._Print(line % (len(self._deps_map), qsize, len(self._jobs) - qsize,
len(self._retry_queue), self._total_jobs,
seconds / 60, seconds % 60, self._LoadAvg()))
def _Finish(self, target):
"""Mark a target as completed and unblock dependecies."""