mirror of
https://github.com/matrix-org/synapse.git
synced 2025-10-24 13:51:57 +02:00
Merge pull request #2795 from matrix-org/rav/track_db_scheduling
Track DB scheduling delay per-request
This commit is contained in:
commit
a7e4ff9cca
@ -102,6 +102,10 @@ response_db_txn_duration = metrics.register_counter(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# seconds spent waiting for a db connection, when processing this request
|
||||||
|
response_db_sched_duration = metrics.register_counter(
|
||||||
|
"response_db_sched_duration_seconds", labels=["method", "servlet", "tag"]
|
||||||
|
)
|
||||||
|
|
||||||
_next_request_id = 0
|
_next_request_id = 0
|
||||||
|
|
||||||
@ -372,6 +376,9 @@ class RequestMetrics(object):
|
|||||||
response_db_txn_duration.inc_by(
|
response_db_txn_duration.inc_by(
|
||||||
context.db_txn_duration_ms / 1000., request.method, self.name, tag
|
context.db_txn_duration_ms / 1000., request.method, self.name, tag
|
||||||
)
|
)
|
||||||
|
response_db_sched_duration.inc_by(
|
||||||
|
context.db_sched_duration_ms / 1000., request.method, self.name, tag
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RootRedirect(resource.Resource):
|
class RootRedirect(resource.Resource):
|
||||||
|
@ -67,13 +67,14 @@ class SynapseRequest(Request):
|
|||||||
ru_utime, ru_stime = context.get_resource_usage()
|
ru_utime, ru_stime = context.get_resource_usage()
|
||||||
db_txn_count = context.db_txn_count
|
db_txn_count = context.db_txn_count
|
||||||
db_txn_duration_ms = context.db_txn_duration_ms
|
db_txn_duration_ms = context.db_txn_duration_ms
|
||||||
|
db_sched_duration_ms = context.db_sched_duration_ms
|
||||||
except Exception:
|
except Exception:
|
||||||
ru_utime, ru_stime = (0, 0)
|
ru_utime, ru_stime = (0, 0)
|
||||||
db_txn_count, db_txn_duration_ms = (0, 0)
|
db_txn_count, db_txn_duration_ms = (0, 0)
|
||||||
|
|
||||||
self.site.access_logger.info(
|
self.site.access_logger.info(
|
||||||
"%s - %s - {%s}"
|
"%s - %s - {%s}"
|
||||||
" Processed request: %dms (%dms, %dms) (%dms/%d)"
|
" Processed request: %dms (%dms, %dms) (%dms/%dms/%d)"
|
||||||
" %sB %s \"%s %s %s\" \"%s\"",
|
" %sB %s \"%s %s %s\" \"%s\"",
|
||||||
self.getClientIP(),
|
self.getClientIP(),
|
||||||
self.site.site_tag,
|
self.site.site_tag,
|
||||||
@ -81,6 +82,7 @@ class SynapseRequest(Request):
|
|||||||
int(time.time() * 1000) - self.start_time,
|
int(time.time() * 1000) - self.start_time,
|
||||||
int(ru_utime * 1000),
|
int(ru_utime * 1000),
|
||||||
int(ru_stime * 1000),
|
int(ru_stime * 1000),
|
||||||
|
db_sched_duration_ms,
|
||||||
db_txn_duration_ms,
|
db_txn_duration_ms,
|
||||||
int(db_txn_count),
|
int(db_txn_count),
|
||||||
self.sentLength,
|
self.sentLength,
|
||||||
|
@ -347,7 +347,9 @@ class SQLBaseStore(object):
|
|||||||
|
|
||||||
def inner_func(conn, *args, **kwargs):
|
def inner_func(conn, *args, **kwargs):
|
||||||
with LoggingContext("runWithConnection") as context:
|
with LoggingContext("runWithConnection") as context:
|
||||||
sql_scheduling_timer.inc_by(time.time() * 1000 - start_time)
|
sched_duration_ms = time.time() * 1000 - start_time
|
||||||
|
sql_scheduling_timer.inc_by(sched_duration_ms)
|
||||||
|
current_context.add_database_scheduled(sched_duration_ms)
|
||||||
|
|
||||||
if self.database_engine.is_connection_closed(conn):
|
if self.database_engine.is_connection_closed(conn):
|
||||||
logger.debug("Reconnecting closed database connection")
|
logger.debug("Reconnecting closed database connection")
|
||||||
|
@ -59,7 +59,8 @@ class LoggingContext(object):
|
|||||||
|
|
||||||
__slots__ = [
|
__slots__ = [
|
||||||
"previous_context", "name", "ru_stime", "ru_utime",
|
"previous_context", "name", "ru_stime", "ru_utime",
|
||||||
"db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end",
|
"db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms",
|
||||||
|
"usage_start", "usage_end",
|
||||||
"main_thread", "alive",
|
"main_thread", "alive",
|
||||||
"request", "tag",
|
"request", "tag",
|
||||||
]
|
]
|
||||||
@ -86,6 +87,9 @@ class LoggingContext(object):
|
|||||||
def add_database_transaction(self, duration_ms):
|
def add_database_transaction(self, duration_ms):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def add_database_scheduled(self, sched_ms):
|
||||||
|
pass
|
||||||
|
|
||||||
def __nonzero__(self):
|
def __nonzero__(self):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -101,6 +105,9 @@ class LoggingContext(object):
|
|||||||
# ms spent waiting for db txns, excluding scheduling time
|
# ms spent waiting for db txns, excluding scheduling time
|
||||||
self.db_txn_duration_ms = 0
|
self.db_txn_duration_ms = 0
|
||||||
|
|
||||||
|
# ms spent waiting for db txns to be scheduled
|
||||||
|
self.db_sched_duration_ms = 0
|
||||||
|
|
||||||
self.usage_start = None
|
self.usage_start = None
|
||||||
self.usage_end = None
|
self.usage_end = None
|
||||||
self.main_thread = threading.current_thread()
|
self.main_thread = threading.current_thread()
|
||||||
@ -210,6 +217,15 @@ class LoggingContext(object):
|
|||||||
self.db_txn_count += 1
|
self.db_txn_count += 1
|
||||||
self.db_txn_duration_ms += duration_ms
|
self.db_txn_duration_ms += duration_ms
|
||||||
|
|
||||||
|
def add_database_scheduled(self, sched_ms):
|
||||||
|
"""Record a use of the database pool
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sched_ms (int): number of milliseconds it took us to get a
|
||||||
|
connection
|
||||||
|
"""
|
||||||
|
self.db_sched_duration_ms += sched_ms
|
||||||
|
|
||||||
|
|
||||||
class LoggingContextFilter(logging.Filter):
|
class LoggingContextFilter(logging.Filter):
|
||||||
"""Logging filter that adds values from the current logging context to each
|
"""Logging filter that adds values from the current logging context to each
|
||||||
|
@ -80,6 +80,11 @@ block_db_txn_duration = metrics.register_counter(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# seconds spent waiting for a db connection, in this block
|
||||||
|
block_db_sched_duration = metrics.register_counter(
|
||||||
|
"block_db_sched_duration_seconds", labels=["block_name"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def measure_func(name):
|
def measure_func(name):
|
||||||
def wrapper(func):
|
def wrapper(func):
|
||||||
@ -96,7 +101,9 @@ def measure_func(name):
|
|||||||
class Measure(object):
|
class Measure(object):
|
||||||
__slots__ = [
|
__slots__ = [
|
||||||
"clock", "name", "start_context", "start", "new_context", "ru_utime",
|
"clock", "name", "start_context", "start", "new_context", "ru_utime",
|
||||||
"ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context"
|
"ru_stime",
|
||||||
|
"db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms",
|
||||||
|
"created_context",
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, clock, name):
|
def __init__(self, clock, name):
|
||||||
@ -117,6 +124,7 @@ class Measure(object):
|
|||||||
self.ru_utime, self.ru_stime = self.start_context.get_resource_usage()
|
self.ru_utime, self.ru_stime = self.start_context.get_resource_usage()
|
||||||
self.db_txn_count = self.start_context.db_txn_count
|
self.db_txn_count = self.start_context.db_txn_count
|
||||||
self.db_txn_duration_ms = self.start_context.db_txn_duration_ms
|
self.db_txn_duration_ms = self.start_context.db_txn_duration_ms
|
||||||
|
self.db_sched_duration_ms = self.start_context.db_sched_duration_ms
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
if isinstance(exc_type, Exception) or not self.start_context:
|
if isinstance(exc_type, Exception) or not self.start_context:
|
||||||
@ -149,6 +157,10 @@ class Measure(object):
|
|||||||
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.,
|
(context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.,
|
||||||
self.name
|
self.name
|
||||||
)
|
)
|
||||||
|
block_db_sched_duration.inc_by(
|
||||||
|
(context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.,
|
||||||
|
self.name
|
||||||
|
)
|
||||||
|
|
||||||
if self.created_context:
|
if self.created_context:
|
||||||
self.start_context.__exit__(exc_type, exc_val, exc_tb)
|
self.start_context.__exit__(exc_type, exc_val, exc_tb)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user