MINOR: checks: add on-marked-up option

This implements the feature discussed in the earlier thread of killing
connections on backup servers when a non-backup server comes back up. For
example, you can use this to route to a mysql master & slave and ensure
clients don't stay on the slave after the master goes from down->up. I've done
some minimal testing and it seems to work.

[WT: added session flag & doc, moved the killing after logging the server UP,
 and ensured that the new server is really usable]
This commit is contained in:
Justin Karneges 2012-05-24 15:28:52 -07:00 committed by Willy Tarreau
parent 39b0665bc7
commit eb2c24ae2a
7 changed files with 77 additions and 8 deletions

View File

@ -6970,7 +6970,30 @@ on-error <mode>
on-marked-down <action> on-marked-down <action>
Modify what occurs when a server is marked down. Modify what occurs when a server is marked down.
Currently one action is available: Currently one action is available:
- shutdown-sessions: Shutdown peer sessions - shutdown-sessions: Shutdown peer sessions. When this setting is enabled,
all connections to the server are immediately terminated when the server
goes down. It might be used if the health check detects more complex cases
than a simple connection status, and long timeouts would cause the service
to remain unresponsive for too long a time. For instance, a health check
might detect that a database is stuck and that there's no chance to reuse
existing connections anymore. Connections killed this way are logged with
a 'D' termination code (for "Down").
Actions are disabled by default
Supported in default-server: Yes
on-marked-up <action>
Modify what occurs when a server is marked up.
Currently one action is available:
- shutdown-backup-sessions: Shutdown sessions on all backup servers. This is
done only if the server is not in backup state and if it is not disabled
(it must have an effective weight > 0). This can be used sometimes to force
an active server to take all the traffic back after recovery when dealing
with long sessions (eg: LDAP, SQL, ...). Doing this can cause more trouble
than it tries to solve (eg: incomplete transactions), so use this feature
with extreme care. Sessions killed because a server comes up are logged
with an 'U' termination code (for "Up").
Actions are disabled by default Actions are disabled by default
@ -9407,6 +9430,10 @@ each of which has a special meaning :
D : the session was killed by haproxy because the server was detected D : the session was killed by haproxy because the server was detected
as down and was configured to kill all connections when going down. as down and was configured to kill all connections when going down.
U : the session was killed by haproxy on this backup server because an
active server was detected as up and was configured to kill all
backup connections when going up.
K : the session was actively killed by an admin operating on haproxy. K : the session was actively killed by an admin operating on haproxy.
c : the client-side timeout expired while waiting for the client to c : the client-side timeout expired while waiting for the client to

View File

@ -75,10 +75,14 @@ enum {
enum { enum {
HANA_ONMARKEDDOWN_NONE = 0, HANA_ONMARKEDDOWN_NONE = 0,
HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS, /* Shutdown peer sessions */ HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS, /* Shutdown peer sessions */
}; };
enum {
HANA_ONMARKEDUP_NONE = 0,
HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS, /* Shutdown peer sessions */
};
enum { enum {
HANA_OBS_NONE = 0, HANA_OBS_NONE = 0,

View File

@ -120,7 +120,8 @@ struct server {
int rise, fall; /* time in iterations */ int rise, fall; /* time in iterations */
int consecutive_errors_limit; /* number of consecutive errors that triggers an event */ int consecutive_errors_limit; /* number of consecutive errors that triggers an event */
short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */ short observe, onerror; /* observing mode: one of HANA_OBS_*; what to do on error: on of ANA_ONERR_* */
short onmarkeddown; /* what to do when marked down: on of HANA_ONMARKEDDOWN_* */ short onmarkeddown; /* what to do when marked down: one of HANA_ONMARKEDDOWN_* */
short onmarkedup; /* what to do when marked up: one of HANA_ONMARKEDUP_* */
int inter, fastinter, downinter; /* checks: time in milliseconds */ int inter, fastinter, downinter; /* checks: time in milliseconds */
int slowstart; /* slowstart time in seconds (ms in the conf) */ int slowstart; /* slowstart time in seconds (ms in the conf) */
int result; /* health-check result : SRV_CHK_* */ int result; /* health-check result : SRV_CHK_* */

View File

@ -67,6 +67,7 @@
#define SN_ERR_INTERNAL 0x00007000 /* the proxy encountered an internal error */ #define SN_ERR_INTERNAL 0x00007000 /* the proxy encountered an internal error */
#define SN_ERR_DOWN 0x00008000 /* the proxy killed a session because the backend became unavailable */ #define SN_ERR_DOWN 0x00008000 /* the proxy killed a session because the backend became unavailable */
#define SN_ERR_KILLED 0x00009000 /* the proxy killed a session because it was asked to do so */ #define SN_ERR_KILLED 0x00009000 /* the proxy killed a session because it was asked to do so */
#define SN_ERR_UP 0x0000a000 /* the proxy killed a session because a preferred backend became available */
#define SN_ERR_MASK 0x0000f000 /* mask to get only session error flags */ #define SN_ERR_MASK 0x0000f000 /* mask to get only session error flags */
#define SN_ERR_SHIFT 12 /* bit shift */ #define SN_ERR_SHIFT 12 /* bit shift */

View File

@ -4406,6 +4406,18 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
cur_arg += 2; cur_arg += 2;
} }
else if (!strcmp(args[cur_arg], "on-marked-up")) {
if (!strcmp(args[cur_arg + 1], "shutdown-backup-sessions"))
newsrv->onmarkedup = HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS;
else {
Alert("parsing [%s:%d]: '%s' expects 'shutdown-backup-sessions' but got '%s'\n",
file, linenum, args[cur_arg], args[cur_arg + 1]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
cur_arg += 2;
}
else if (!strcmp(args[cur_arg], "error-limit")) { else if (!strcmp(args[cur_arg], "error-limit")) {
if (!*args[cur_arg + 1]) { if (!*args[cur_arg + 1]) {
Alert("parsing [%s:%d]: '%s' expects an integer argument.\n", Alert("parsing [%s:%d]: '%s' expects an integer argument.\n",

View File

@ -358,15 +358,30 @@ static int check_for_pending(struct server *s)
return xferred; return xferred;
} }
/* Shutdown connections when their server goes down. /* Shutdown all connections of a server. The caller must pass a termination
* code in <why>, which must be one of SN_ERR_* indicating the reason for the
* shutdown.
*/ */
static void shutdown_sessions(struct server *srv) static void shutdown_sessions(struct server *srv, int why)
{ {
struct session *session, *session_bck; struct session *session, *session_bck;
list_for_each_entry_safe(session, session_bck, &srv->actconns, by_srv) list_for_each_entry_safe(session, session_bck, &srv->actconns, by_srv)
if (session->srv_conn == srv) if (session->srv_conn == srv)
session_shutdown(session, SN_ERR_DOWN); session_shutdown(session, why);
}
/* Shutdown all connections of all backup servers of a proxy. The caller must
* pass a termination code in <why>, which must be one of SN_ERR_* indicating
* the reason for the shutdown.
*/
static void shutdown_backup_sessions(struct proxy *px, int why)
{
struct server *srv;
for (srv = px->srv; srv != NULL; srv = srv->next)
if (srv->state & SRV_BACKUP)
shutdown_sessions(srv, why);
} }
/* Sets server <s> down, notifies by all available means, recounts the /* Sets server <s> down, notifies by all available means, recounts the
@ -394,7 +409,7 @@ void set_server_down(struct server *s)
s->proxy->lbprm.set_server_status_down(s); s->proxy->lbprm.set_server_status_down(s);
if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS) if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
shutdown_sessions(s); shutdown_sessions(s, SN_ERR_DOWN);
/* we might have sessions queued on this server and waiting for /* we might have sessions queued on this server and waiting for
* a connection. Those which are redispatchable will be queued * a connection. Those which are redispatchable will be queued
@ -480,6 +495,15 @@ void set_server_up(struct server *s) {
if (s->proxy->lbprm.set_server_status_up) if (s->proxy->lbprm.set_server_status_up)
s->proxy->lbprm.set_server_status_up(s); s->proxy->lbprm.set_server_status_up(s);
/* If the server is set with "on-marked-up shutdown-backup-sessions",
* and it's not a backup server and its effective weight is > 0,
* then it can accept new connections, so we shut down all sessions
* on all backup servers.
*/
if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
!(s->state & SRV_BACKUP) && s->eweight)
shutdown_backup_sessions(s->proxy, SN_ERR_UP);
/* check if we can handle some connections queued at the proxy. We /* check if we can handle some connections queued at the proxy. We
* will take as many as we can handle. * will take as many as we can handle.
*/ */

View File

@ -50,7 +50,7 @@ const char *log_levels[NB_LOG_LEVELS] = {
"warning", "notice", "info", "debug" "warning", "notice", "info", "debug"
}; };
const char sess_term_cond[10] = "-cCsSPRIDK"; /* normal, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed */ const char sess_term_cond[16] = "-cCsSPRIDKUIIIII"; /* normal, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed, Up, -- */
const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */ const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */