[MEDIUM] further improve monotonic clock by check forward jumps

The first implementation of the monotonic clock did not verify
forward jumps. The consequence is that a fast changing time may
expire a lot of tasks. While it does seem minor, in fact it is
problematic because most machines which boot with a wrong date
are in the past and suddenly see their time jump by several
years in the future.

The solution is to check if we spent more apparent time in
a poller than allowed (with a margin applied). The margin
is currently set to 1000 ms. It should be large enough for
any poll() to complete.

Tests with randomly jumping clock show that the result is quite
accurate (error less than 1 second at every change of more than
one second).
This commit is contained in:
Willy Tarreau 2008-06-23 14:00:57 +02:00
parent b7f694f20e
commit b0b37bcd65
10 changed files with 129 additions and 51 deletions

View File

@ -44,6 +44,11 @@
#define TIME_ETERNITY (TV_ETERNITY_MS) #define TIME_ETERNITY (TV_ETERNITY_MS)
/* we want to be able to detect time jumps. Fix the maximum wait time to a low
* value so that we know the time has changed if we wait longer.
*/
#define MAX_DELAY_MS 1000
/* returns the lowest delay amongst <old> and <new>, and respects TIME_ETERNITY */ /* returns the lowest delay amongst <old> and <new>, and respects TIME_ETERNITY */
#define MINTIME(old, new) (((new)<0)?(old):(((old)<0||(new)<(old))?(new):(old))) #define MINTIME(old, new) (((new)<0)?(old):(((old)<0||(new)<(old))?(new):(old)))
@ -84,13 +89,15 @@ REGPRM1 static inline struct timeval *tv_now(struct timeval *tv)
return tv; return tv;
} }
/* tv_now_mono: sets <date> to the current time (wall clock), <mono> to a value /* tv_udpate_date: sets <date> to system time, and sets <now> to something as
* following a monotonic function, and applies any required correction if the * close as possible to real time, following a monotonic function. The main
* time goes backwards. Note that while we could improve it a bit by checking * principle consists in detecting backwards and forwards time jumps and adjust
* that the new date is not too far in the future, it is not much necessary to * an offset to correct them. This function should be called only once after
* do so. * each poll. The poll's timeout should be passed in <max_wait>, and the return
* value in <interrupted> (a non-zero value means that we have not expired the
* timeout).
*/ */
REGPRM2 struct timeval *tv_now_mono(struct timeval *mono, struct timeval *wall); REGPRM2 void tv_update_date(int max_wait, int interrupted);
/* /*
* sets a struct timeval to its highest value so that it can never happen * sets a struct timeval to its highest value so that it can never happen

View File

@ -2831,7 +2831,7 @@ int readcfgfile(const char *file)
*/ */
/* will be needed further to delay some tasks */ /* will be needed further to delay some tasks */
tv_now_mono(&now, &date); tv_update_date(0,1);
if ((curproxy = proxy) == NULL) { if ((curproxy = proxy) == NULL) {
Alert("parsing %s : no <listen> line. Nothing to do !\n", Alert("parsing %s : no <listen> line. Nothing to do !\n",

View File

@ -235,15 +235,18 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
if (run_queue) if (run_queue)
wait_time = 0; wait_time = 0;
else if (tv_iseternity(exp)) else if (tv_iseternity(exp))
wait_time = -1; wait_time = MAX_DELAY_MS;
else if (tv_isge(&now, exp)) else if (tv_isge(&now, exp))
wait_time = 0; wait_time = 0;
else else {
wait_time = __tv_ms_elapsed(&now, exp) + 1; wait_time = __tv_ms_elapsed(&now, exp) + 1;
if (wait_time > MAX_DELAY_MS)
wait_time = MAX_DELAY_MS;
}
fd = MIN(maxfd, global.tune.maxpollevents); fd = MIN(maxfd, global.tune.maxpollevents);
status = epoll_wait(epoll_fd, epoll_events, fd, wait_time); status = epoll_wait(epoll_fd, epoll_events, fd, wait_time);
tv_now_mono(&now, &date); tv_update_date(wait_time, status);
for (count = 0; count < status; count++) { for (count = 0; count < status; count++) {
fd = epoll_events[count].data.fd; fd = epoll_events[count].data.fd;

View File

@ -102,25 +102,41 @@ REGPRM1 static void __fd_clo(int fd)
REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp) REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
{ {
int status; int status;
int count, fd; int count, fd, delta_ms;
struct timespec timeout, *to_ptr; struct timespec timeout;
to_ptr = NULL; // no timeout
if (run_queue) { if (run_queue) {
timeout.tv_sec = timeout.tv_nsec = 0; timeout.tv_sec = timeout.tv_nsec = 0;
to_ptr = &timeout; delta_ms = 0;
} }
else if (tv_isset(exp)) { else if (tv_isset(exp)) {
const struct timeval max_delay = {
.tv_sec = MAX_DELAY_MS / 1000,
.tv_usec = (MAX_DELAY_MS % 1000) * 1000
};
struct timeval delta; struct timeval delta;
if (tv_isge(&now, exp)) if (tv_isge(&now, exp)) {
delta.tv_sec = delta.tv_usec = 0; delta.tv_sec = delta.tv_usec = 0;
else delta_ms = 0;
}
else {
tv_remain(&now, exp, &delta); tv_remain(&now, exp, &delta);
if (__tv_isgt(&delta, &max_delay)) {
delta = max_delay;
delta_ms = MAX_DELAY_MS;
} else {
delta_ms = delta.tv_sec * 1000 + delta.tv_usec / 1000;
}
}
timeout.tv_sec = delta.tv_sec; timeout.tv_sec = delta.tv_sec;
timeout.tv_nsec = delta.tv_usec * 1000; timeout.tv_nsec = delta.tv_usec * 1000;
to_ptr = &timeout; }
else {
delta_ms = MAX_DELAY_MS;
timeout.tv_sec = MAX_DELAY_MS / 1000;
timeout.tv_nsec = (MAX_DELAY_MS % 1000) * 1000000;
} }
fd = MIN(maxfd, global.tune.maxpollevents); fd = MIN(maxfd, global.tune.maxpollevents);
@ -129,8 +145,8 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
0, // int nchanges 0, // int nchanges
kev, // struct kevent *eventlist kev, // struct kevent *eventlist
fd, // int nevents fd, // int nevents
to_ptr); // const struct timespec *timeout &timeout); // const struct timespec *timeout
tv_now_mono(&now, &date); tv_update_date(delta_ms, status);
for (count = 0; count < status; count++) { for (count = 0; count < status; count++) {
fd = kev[count].ident; fd = kev[count].ident;

View File

@ -127,14 +127,17 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
if (run_queue) if (run_queue)
wait_time = 0; wait_time = 0;
else if (tv_iseternity(exp)) else if (tv_iseternity(exp))
wait_time = -1; wait_time = MAX_DELAY_MS;
else if (tv_isge(&now, exp)) else if (tv_isge(&now, exp))
wait_time = 0; wait_time = 0;
else else {
wait_time = __tv_ms_elapsed(&now, exp) + 1; wait_time = __tv_ms_elapsed(&now, exp) + 1;
if (wait_time > MAX_DELAY_MS)
wait_time = MAX_DELAY_MS;
}
status = poll(poll_events, nbfd, wait_time); status = poll(poll_events, nbfd, wait_time);
tv_now_mono(&now, &date); tv_update_date(wait_time, status);
for (count = 0; status > 0 && count < nbfd; count++) { for (count = 0; status > 0 && count < nbfd; count++) {
fd = poll_events[count].fd; fd = poll_events[count].fd;

View File

@ -80,17 +80,27 @@ REGPRM1 static void __fd_rem(int fd)
*/ */
REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp) REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
{ {
const struct timeval max_delay = {
.tv_sec = MAX_DELAY_MS / 1000,
.tv_usec = (MAX_DELAY_MS % 1000) * 1000
};
int status; int status;
int fd, i; int fd, i;
struct timeval delta; struct timeval delta;
int delta_ms;
int readnotnull, writenotnull; int readnotnull, writenotnull;
int fds; int fds;
char count; char count;
/* allow select to return immediately when needed */ /* allow select to return immediately when needed */
delta.tv_sec = delta.tv_usec = 0; delta.tv_sec = delta.tv_usec = 0;
if (!run_queue && tv_isset(exp)) { delta_ms = 0;
if (tv_islt(&now, exp)) { if (!run_queue) {
if (!tv_isset(exp)) {
delta = max_delay;
delta_ms = MAX_DELAY_MS;
}
else if (tv_islt(&now, exp)) {
tv_remain(&now, exp, &delta); tv_remain(&now, exp, &delta);
/* To avoid eventual select loops due to timer precision */ /* To avoid eventual select loops due to timer precision */
delta.tv_usec += SCHEDULER_RESOLUTION * 1000; delta.tv_usec += SCHEDULER_RESOLUTION * 1000;
@ -98,6 +108,12 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
delta.tv_usec -= 1000000; delta.tv_usec -= 1000000;
delta.tv_sec ++; delta.tv_sec ++;
} }
if (__tv_isge(&delta, &max_delay)) {
delta = max_delay;
delta_ms = MAX_DELAY_MS;
} else {
delta_ms = delta.tv_sec * 1000 + delta.tv_usec / 1000;
}
} }
} }
@ -122,9 +138,9 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
readnotnull ? tmp_evts[DIR_RD] : NULL, readnotnull ? tmp_evts[DIR_RD] : NULL,
writenotnull ? tmp_evts[DIR_WR] : NULL, writenotnull ? tmp_evts[DIR_WR] : NULL,
NULL, NULL,
tv_isset(exp) ? &delta : NULL); &delta);
tv_now_mono(&now, &date); tv_update_date(delta_ms, status);
if (status <= 0) if (status <= 0)
return; return;

View File

@ -418,7 +418,7 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
* returning now without checking epoll_wait(). * returning now without checking epoll_wait().
*/ */
if (++last_skipped <= 1) { if (++last_skipped <= 1) {
tv_now_mono(&now, &date); tv_update_date(0, 1);
return; return;
} }
} }
@ -435,11 +435,14 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
} }
else { else {
if (tv_iseternity(exp)) if (tv_iseternity(exp))
wait_time = -1; wait_time = MAX_DELAY_MS;
else if (tv_isge(&now, exp)) else if (tv_isge(&now, exp))
wait_time = 0; wait_time = 0;
else else {
wait_time = __tv_ms_elapsed(&now, exp) + 1; wait_time = __tv_ms_elapsed(&now, exp) + 1;
if (wait_time > MAX_DELAY_MS)
wait_time = MAX_DELAY_MS;
}
} }
/* now let's wait for real events. We normally use maxpollevents as a /* now let's wait for real events. We normally use maxpollevents as a
@ -451,8 +454,7 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
fd = MIN(maxfd, fd); fd = MIN(maxfd, fd);
spec_processed = 0; spec_processed = 0;
status = epoll_wait(epoll_fd, epoll_events, fd, wait_time); status = epoll_wait(epoll_fd, epoll_events, fd, wait_time);
tv_update_date(wait_time, status);
tv_now_mono(&now, &date);
for (count = 0; count < status; count++) { for (count = 0; count < status; count++) {
int e = epoll_events[count].events; int e = epoll_events[count].events;

View File

@ -415,7 +415,7 @@ void init(int argc, char **argv)
global.rlimit_memmax = HAPROXY_MEMMAX; global.rlimit_memmax = HAPROXY_MEMMAX;
#endif #endif
tv_now_mono(&now, &date); tv_update_date(-1,-1);
start_date = now; start_date = now;
init_task(); init_task();
@ -897,7 +897,7 @@ void run_poll_loop()
{ {
struct timeval next; struct timeval next;
tv_now_mono(&now, &date); tv_update_date(0,1);
while (1) { while (1) {
process_runnable_tasks(&next); process_runnable_tasks(&next);

View File

@ -385,7 +385,7 @@ void soft_stop(void)
stopping = 1; stopping = 1;
p = proxy; p = proxy;
tv_now_mono(&now, &date); /* else, the old time before select will be used */ tv_update_date(0,1); /* else, the old time before select will be used */
while (p) { while (p) {
if (p->state != PR_STSTOPPED) { if (p->state != PR_STSTOPPED) {
Warning("Stopping proxy %s in %d ms.\n", p->id, p->grace); Warning("Stopping proxy %s in %d ms.\n", p->id, p->grace);
@ -434,7 +434,7 @@ void pause_proxies(void)
err = 0; err = 0;
p = proxy; p = proxy;
tv_now_mono(&now, &date); /* else, the old time before select will be used */ tv_update_date(0,1); /* else, the old time before select will be used */
while (p) { while (p) {
if (p->state != PR_STERROR && if (p->state != PR_STERROR &&
p->state != PR_STSTOPPED && p->state != PR_STSTOPPED &&
@ -469,7 +469,7 @@ void listen_proxies(void)
struct listener *l; struct listener *l;
p = proxy; p = proxy;
tv_now_mono(&now, &date); /* else, the old time before select will be used */ tv_update_date(0,1); /* else, the old time before select will be used */
while (p) { while (p) {
if (p->state == PR_STPAUSED) { if (p->state == PR_STPAUSED) {
Warning("Enabling proxy %s.\n", p->id); Warning("Enabling proxy %s.\n", p->id);

View File

@ -143,25 +143,56 @@ REGPRM2 int _tv_isgt(const struct timeval *tv1, const struct timeval *tv2)
return __tv_isgt(tv1, tv2); return __tv_isgt(tv1, tv2);
} }
/* tv_now_mono: sets <date> to the current time (wall clock), <mono> to a value /* tv_udpate_date: sets <date> to system time, and sets <now> to something as
* following a monotonic function, and applies any required correction if the * close as possible to real time, following a monotonic function. The main
* time goes backwards. Note that while we could improve it a bit by checking * principle consists in detecting backwards and forwards time jumps and adjust
* that the new date is not too far in the future, it is not much necessary to * an offset to correct them. This function should be called once after each
* do so. * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should
* be passed in <max_wait>, and the return value in <interrupted> (a non-zero
* value means that we have not expired the timeout). Calling it with (-1,*)
* sets both <date> and <now> to current date, and calling it with (0,1) simply
* updates the values.
*/ */
REGPRM2 struct timeval *tv_now_mono(struct timeval *mono, struct timeval *wall) REGPRM2 void tv_update_date(int max_wait, int interrupted)
{ {
static struct timeval tv_offset; static struct timeval tv_offset; /* warning: signed offset! */
struct timeval adjusted; struct timeval adjusted, deadline;
gettimeofday(wall, NULL); gettimeofday(&date, NULL);
__tv_add(&adjusted, wall, &tv_offset); if (unlikely(max_wait < 0)) {
if (unlikely(__tv_islt(&adjusted, mono))) { tv_zero(&tv_offset);
__tv_remain(wall, mono, &tv_offset); now = date;
return mono; return;
} }
*mono = adjusted; __tv_add(&adjusted, &date, &tv_offset);
return mono; if (unlikely(__tv_islt(&adjusted, &now))) {
goto fixup; /* jump in the past */
}
/* OK we did not jump backwards, let's see if we have jumped too far
* forwards. The poll value was in <max_wait>, we accept that plus
* MAX_DELAY_MS to cover additional time.
*/
_tv_ms_add(&deadline, &now, max_wait + MAX_DELAY_MS);
if (unlikely(__tv_isge(&adjusted, &deadline))) {
goto fixup; /* jump in the future */
}
now = adjusted;
return;
fixup:
/* Large jump. If the poll was interrupted, we consider that the date
* has not changed (immediate wake-up), otherwise we add the poll
* time-out to the previous date. The new offset is recomputed.
*/
if (!interrupted)
_tv_ms_add(&now, &now, max_wait);
tv_offset.tv_sec = now.tv_sec - date.tv_sec;
tv_offset.tv_usec = now.tv_usec - date.tv_usec;
if (tv_offset.tv_usec < 0) {
tv_offset.tv_usec += 1000000;
tv_offset.tv_sec--;
}
return;
} }
char *human_time(int t, short hz_div) { char *human_time(int t, short hz_div) {