mirror of
				https://github.com/matrix-org/synapse.git
				synced 2025-10-25 06:11:31 +02:00 
			
		
		
		
	Track DB scheduling delay per-request
For each request, track the amount of time spent waiting for a db connection. This entails adding it to the LoggingContext and we may as well add metrics for it while we are passing.
This commit is contained in:
		
							parent
							
								
									0f5d2cc37c
								
							
						
					
					
						commit
						3d12d97415
					
				| @ -102,6 +102,10 @@ response_db_txn_duration = metrics.register_counter( | |||||||
|     ), |     ), | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | # seconds spent waiting for a db connection, when processing this request | ||||||
|  | response_db_sched_duration = metrics.register_counter( | ||||||
|  |     "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| _next_request_id = 0 | _next_request_id = 0 | ||||||
| 
 | 
 | ||||||
| @ -381,6 +385,9 @@ class RequestMetrics(object): | |||||||
|         response_db_txn_duration.inc_by( |         response_db_txn_duration.inc_by( | ||||||
|             context.db_txn_duration_ms / 1000., request.method, self.name, tag |             context.db_txn_duration_ms / 1000., request.method, self.name, tag | ||||||
|         ) |         ) | ||||||
|  |         response_db_sched_duration.inc_by( | ||||||
|  |             context.db_sched_duration_ms / 1000., request.method, self.name, tag | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class RootRedirect(resource.Resource): | class RootRedirect(resource.Resource): | ||||||
|  | |||||||
| @ -67,13 +67,14 @@ class SynapseRequest(Request): | |||||||
|             ru_utime, ru_stime = context.get_resource_usage() |             ru_utime, ru_stime = context.get_resource_usage() | ||||||
|             db_txn_count = context.db_txn_count |             db_txn_count = context.db_txn_count | ||||||
|             db_txn_duration_ms = context.db_txn_duration_ms |             db_txn_duration_ms = context.db_txn_duration_ms | ||||||
|  |             db_sched_duration_ms = context.db_sched_duration_ms | ||||||
|         except Exception: |         except Exception: | ||||||
|             ru_utime, ru_stime = (0, 0) |             ru_utime, ru_stime = (0, 0) | ||||||
|             db_txn_count, db_txn_duration_ms = (0, 0) |             db_txn_count, db_txn_duration_ms = (0, 0) | ||||||
| 
 | 
 | ||||||
|         self.site.access_logger.info( |         self.site.access_logger.info( | ||||||
|             "%s - %s - {%s}" |             "%s - %s - {%s}" | ||||||
|             " Processed request: %dms (%dms, %dms) (%dms/%d)" |             " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" | ||||||
|             " %sB %s \"%s %s %s\" \"%s\"", |             " %sB %s \"%s %s %s\" \"%s\"", | ||||||
|             self.getClientIP(), |             self.getClientIP(), | ||||||
|             self.site.site_tag, |             self.site.site_tag, | ||||||
| @ -81,6 +82,7 @@ class SynapseRequest(Request): | |||||||
|             int(time.time() * 1000) - self.start_time, |             int(time.time() * 1000) - self.start_time, | ||||||
|             int(ru_utime * 1000), |             int(ru_utime * 1000), | ||||||
|             int(ru_stime * 1000), |             int(ru_stime * 1000), | ||||||
|  |             db_sched_duration_ms, | ||||||
|             db_txn_duration_ms, |             db_txn_duration_ms, | ||||||
|             int(db_txn_count), |             int(db_txn_count), | ||||||
|             self.sentLength, |             self.sentLength, | ||||||
|  | |||||||
| @ -347,7 +347,9 @@ class SQLBaseStore(object): | |||||||
| 
 | 
 | ||||||
|         def inner_func(conn, *args, **kwargs): |         def inner_func(conn, *args, **kwargs): | ||||||
|             with LoggingContext("runWithConnection") as context: |             with LoggingContext("runWithConnection") as context: | ||||||
|                 sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) |                 sched_duration_ms = time.time() * 1000 - start_time | ||||||
|  |                 sql_scheduling_timer.inc_by(sched_duration_ms) | ||||||
|  |                 current_context.add_database_scheduled(sched_duration_ms) | ||||||
| 
 | 
 | ||||||
|                 if self.database_engine.is_connection_closed(conn): |                 if self.database_engine.is_connection_closed(conn): | ||||||
|                     logger.debug("Reconnecting closed database connection") |                     logger.debug("Reconnecting closed database connection") | ||||||
|  | |||||||
| @ -59,7 +59,8 @@ class LoggingContext(object): | |||||||
| 
 | 
 | ||||||
|     __slots__ = [ |     __slots__ = [ | ||||||
|         "previous_context", "name", "ru_stime", "ru_utime", |         "previous_context", "name", "ru_stime", "ru_utime", | ||||||
|         "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", |         "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", | ||||||
|  |         "usage_start", "usage_end", | ||||||
|         "main_thread", "alive", |         "main_thread", "alive", | ||||||
|         "request", "tag", |         "request", "tag", | ||||||
|     ] |     ] | ||||||
| @ -86,6 +87,9 @@ class LoggingContext(object): | |||||||
|         def add_database_transaction(self, duration_ms): |         def add_database_transaction(self, duration_ms): | ||||||
|             pass |             pass | ||||||
| 
 | 
 | ||||||
|  |         def add_database_scheduled(self, sched_ms): | ||||||
|  |             pass | ||||||
|  | 
 | ||||||
|         def __nonzero__(self): |         def __nonzero__(self): | ||||||
|             return False |             return False | ||||||
| 
 | 
 | ||||||
| @ -101,6 +105,9 @@ class LoggingContext(object): | |||||||
|         # ms spent waiting for db txns, excluding scheduling time |         # ms spent waiting for db txns, excluding scheduling time | ||||||
|         self.db_txn_duration_ms = 0 |         self.db_txn_duration_ms = 0 | ||||||
| 
 | 
 | ||||||
|  |         # ms spent waiting for db txns to be scheduled | ||||||
|  |         self.db_sched_duration_ms = 0 | ||||||
|  | 
 | ||||||
|         self.usage_start = None |         self.usage_start = None | ||||||
|         self.usage_end = None |         self.usage_end = None | ||||||
|         self.main_thread = threading.current_thread() |         self.main_thread = threading.current_thread() | ||||||
| @ -210,6 +217,15 @@ class LoggingContext(object): | |||||||
|         self.db_txn_count += 1 |         self.db_txn_count += 1 | ||||||
|         self.db_txn_duration_ms += duration_ms |         self.db_txn_duration_ms += duration_ms | ||||||
| 
 | 
 | ||||||
|  |     def add_database_scheduled(self, sched_ms): | ||||||
|  |         """Record a use of the database pool | ||||||
|  | 
 | ||||||
|  |         Args: | ||||||
|  |             sched_ms (int): number of milliseconds it took us to get a | ||||||
|  |                 connection | ||||||
|  |         """ | ||||||
|  |         self.db_sched_duration_ms += sched_ms | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class LoggingContextFilter(logging.Filter): | class LoggingContextFilter(logging.Filter): | ||||||
|     """Logging filter that adds values from the current logging context to each |     """Logging filter that adds values from the current logging context to each | ||||||
|  | |||||||
| @ -80,6 +80,11 @@ block_db_txn_duration = metrics.register_counter( | |||||||
|     ), |     ), | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | # seconds spent waiting for a db connection, in this block | ||||||
|  | block_db_sched_duration = metrics.register_counter( | ||||||
|  |     "block_db_sched_duration_seconds", labels=["block_name"], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def measure_func(name): | def measure_func(name): | ||||||
|     def wrapper(func): |     def wrapper(func): | ||||||
| @ -96,7 +101,9 @@ def measure_func(name): | |||||||
| class Measure(object): | class Measure(object): | ||||||
|     __slots__ = [ |     __slots__ = [ | ||||||
|         "clock", "name", "start_context", "start", "new_context", "ru_utime", |         "clock", "name", "start_context", "start", "new_context", "ru_utime", | ||||||
|         "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" |         "ru_stime", | ||||||
|  |         "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", | ||||||
|  |         "created_context", | ||||||
|     ] |     ] | ||||||
| 
 | 
 | ||||||
|     def __init__(self, clock, name): |     def __init__(self, clock, name): | ||||||
| @ -117,6 +124,7 @@ class Measure(object): | |||||||
|         self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() |         self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() | ||||||
|         self.db_txn_count = self.start_context.db_txn_count |         self.db_txn_count = self.start_context.db_txn_count | ||||||
|         self.db_txn_duration_ms = self.start_context.db_txn_duration_ms |         self.db_txn_duration_ms = self.start_context.db_txn_duration_ms | ||||||
|  |         self.db_sched_duration_ms = self.start_context.db_sched_duration_ms | ||||||
| 
 | 
 | ||||||
|     def __exit__(self, exc_type, exc_val, exc_tb): |     def __exit__(self, exc_type, exc_val, exc_tb): | ||||||
|         if isinstance(exc_type, Exception) or not self.start_context: |         if isinstance(exc_type, Exception) or not self.start_context: | ||||||
| @ -149,6 +157,10 @@ class Measure(object): | |||||||
|             (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., |             (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., | ||||||
|             self.name |             self.name | ||||||
|         ) |         ) | ||||||
|  |         block_db_sched_duration.inc_by( | ||||||
|  |             (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000., | ||||||
|  |             self.name | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
|         if self.created_context: |         if self.created_context: | ||||||
|             self.start_context.__exit__(exc_type, exc_val, exc_tb) |             self.start_context.__exit__(exc_type, exc_val, exc_tb) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user