BUG/MEDIUM: threads/time: maintain a common time reference between all threads

During high loads it becomes visible that the time drifts between threads, sometimes showing tens of seconds after several minutes. The root cause is the per-thread correction which is performed based on a local offset and local time. But we can't use a unique global time either as we need the thread-local time to be stable between two poll() calls. This commit takes a stab at this problem by proceeding this way : - a global "global_now" date is monotonous and common between all threads. - each thread has its own local <now> which is resynced with <global_now> on each invocation of tv_update_date() - each thread detects its own drift based on its poll() timeout and its local <now>, and recalculates its adjusted local time - each thread then ensures its new local time is no older than the current global time, otherwise it readjusts its local time to match this one - finally threads do atomically update the global time to match its own local one This guarantees a monotonous global time and a monotonous+stable local time. It is still possible by definition for two threads to report a minor time variation on subsequent events but that variation will only be caused by the moment they watched the time and are very small. When a common global time is needed between all threads, global_now could be used as a reference (with care). The wallclock time used in logs is still <date> anyway.
2025-11-24 12:20:59 +01:00 · 2017-11-23 14:52:28 +01:00 · 2017-11-23 14:52:28 +01:00 · 9fefc51c56
commit 9fefc51c56
parent 7649aacf7f
1 changed files with 49 additions and 31 deletions
--- a/src/time.c
+++ b/src/time.c
@ -28,6 +28,9 @@ struct timeval start_date;      /* the process's start date */
 THREAD_LOCAL struct timeval before_poll;     /* system date before calling poll() */
 THREAD_LOCAL struct timeval after_poll;      /* system date after leaving poll() */

+static THREAD_LOCAL struct timeval tv_offset;  /* per-thread time ofsset relative to global time */
+volatile unsigned long long global_now;        /* common date between all threads (32:32) */
+
 /*
 * adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv>
 */
@ -170,55 +173,70 @@ REGPRM2 int _tv_isgt(const struct timeval *tv1, const struct timeval *tv2)
 */
 REGPRM2 void tv_update_date(int max_wait, int interrupted)
 {
-	volatile static long long offset = 0;
-	struct timeval adjusted, deadline;
+	struct timeval adjusted, deadline, tmp_now;
 	unsigned int   curr_sec_ms;     /* millisecond of current second (0..999) */
-	long long new_ofs;
+	unsigned long long old_now;
+	unsigned long long new_now;

 	gettimeofday(&date, NULL);
 	if (unlikely(max_wait < 0)) {
-		HA_ATOMIC_STORE(&offset, 0);
+		tv_zero(&tv_offset);
 		adjusted = date;
 		after_poll = date;
 		samp_time = idle_time = 0;
 		idle_pct = 100;
+		global_now = (((unsigned long long)adjusted.tv_sec) << 32) +
+		             (unsigned int)adjusted.tv_usec;
 		goto to_ms;
 	}

-	new_ofs = offset;
+	__tv_add(&adjusted, &date, &tv_offset);

-	adjusted.tv_sec  = date.tv_sec  + (int)(new_ofs >> 32);
-	adjusted.tv_usec = date.tv_usec + (int)(new_ofs & 0xFFFFFFFFU);
-	if (adjusted.tv_usec > 999999) {
-		adjusted.tv_usec -= 1000000;
-		adjusted.tv_sec  += 1;
-	}
-
-	if (unlikely(__tv_islt(&adjusted, &now))) {
-		goto fixup; /* jump in the past */
-	}
-
-	/* OK we did not jump backwards, let's see if we have jumped too far
-	 * forwards. The poll value was in <max_wait>, we accept that plus
-	 * MAX_DELAY_MS to cover additional time.
+	/* compute the minimum and maximum local date we may have reached based
+	 * on our past date and the associated timeout.
 	 */
 	_tv_ms_add(&deadline, &now, max_wait + MAX_DELAY_MS);
-	if (likely(__tv_islt(&adjusted, &deadline)))
-		goto to_ms; /* OK time is within expected range */
- fixup:
-	/* Large jump. If the poll was interrupted, we consider that the date
-	 * has not changed (immediate wake-up), otherwise we add the poll
-	 * time-out to the previous date. The new offset is recomputed.
+
+	if (unlikely(__tv_islt(&adjusted, &now) || __tv_islt(&deadline, &adjusted))) {
+		/* Large jump. If the poll was interrupted, we consider that the
+		 * date has not changed (immediate wake-up), otherwise we add
+		 * the poll time-out to the previous date. The new offset is
+		 * recomputed.
 		 */
 		_tv_ms_add(&adjusted, &now, interrupted ? 0 : max_wait);
+	}

-	new_ofs = (((long)(adjusted.tv_sec  - date.tv_sec)) << 32) +
-	          (unsigned int)(adjusted.tv_usec - date.tv_usec);
+	/* now that we have bounded the local time, let's check if it's
+	 * realistic regarding the global date, which only moves forward,
+	 * otherwise catch up.
+	 */
+	old_now = global_now;

-	if ((int)(new_ofs & 0xFFFFFFFFU) < 0)
-		new_ofs = new_ofs + 1000000 - 0x100000000UL;
+	do {
+		tmp_now.tv_sec  = (unsigned int)(old_now >> 32);
+		tmp_now.tv_usec = old_now & 0xFFFFFFFFU;
+
+		if (__tv_islt(&adjusted, &tmp_now))
+			adjusted = tmp_now;
+
+		/* now <adjusted> is expected to be the most accurate date,
+		 * equal to <global_now> or newer.
+		 */
+		new_now = (((unsigned long long)adjusted.tv_sec) << 32) + (unsigned int)adjusted.tv_usec;
+
+		/* let's try to update the global <now> or loop again */
+	} while (!HA_ATOMIC_CAS(&global_now, &old_now, new_now));
+
+	/* the new global date when we looked was old_now, and the new one is
+	 * new_now == adjusted. We can recompute our local offset.
+	 */
+	tv_offset.tv_sec  = adjusted.tv_sec  - date.tv_sec;
+	tv_offset.tv_usec = adjusted.tv_usec - date.tv_usec;
+	if (tv_offset.tv_usec < 0) {
+		tv_offset.tv_usec += 1000000;
+		tv_offset.tv_sec--;
+	}

-	HA_ATOMIC_STORE(&offset, new_ofs);
 to_ms:
 	now = adjusted;
 	curr_sec_ms = now.tv_usec / 1000;            /* ms of current second */