mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-08-06 07:07:04 +02:00
MINOR: glitches: add global setting "tune.glitches.kill.cpu-usage"
It was mentioned during the development of glitches that it would be nice to support not killing misbehaving connections below a certain CPU usage so that poor implementations that routinely misbehave without impact are not killed. This is now possible by setting a CPU usage threshold under which we don't kill them via this parameter. It defaults to zero so that we continue to kill them by default.
This commit is contained in:
parent
eee57b4d3f
commit
a1577a89a0
@ -4016,6 +4016,25 @@ tune.fd.edge-triggered { on | off } [ EXPERIMENTAL ]
|
||||
certain scenarios. This is still experimental, it may result in frozen
|
||||
connections if bugs are still present, and is disabled by default.
|
||||
|
||||
tune.glitches.kill.cpu-usage <number>
|
||||
Sets the minimum CPU usage between 0 and 100, at which connections showing
|
||||
too many glitches will be killed. This applies to connections that have
|
||||
reached their glitches-threshold limit. In environments where very long
|
||||
connections often behave badly without causing any performance impact, it
|
||||
might be desirable to keep them regardless of their misbehavior as long as
|
||||
they do not hurt, and to only start to kill such connections when the CPU is
|
||||
getting busy. This parameters allows to specify that a connection reaching
|
||||
its glitches threshold will be actively killed when the CPU usage is at this
|
||||
level or above, but never when it's below. Note that the CPU usage is
|
||||
measured per thread, so a single misbehaving connection might be killed. The
|
||||
default is zero, meaning that a connection reaching its glitches-threshold
|
||||
will automatically get killed. A rule of thumb would be to set this value to
|
||||
twice the usually observed CPU usage, or the commonly observed CPU usage plus
|
||||
half the idle one (i.e. if CPU commonly reaches 60%, setting 80 here can make
|
||||
sense). This parameter has no effect without tune.h2.fe.glitches-threshold or
|
||||
tune.quic.frontend.glitches-threshold. See also the global parameters
|
||||
"tune.h2.fe.glitches-threshold" and "tune.quic.frontend.glitches-threshold".
|
||||
|
||||
tune.h1.zero-copy-fwd-recv { on | off }
|
||||
Enables ('on') of disabled ('off') the zero-copy receives of data for the H1
|
||||
multiplexer. It is enabled by default.
|
||||
@ -4036,9 +4055,12 @@ tune.h2.be.glitches-threshold <number>
|
||||
event will cause a connection to be closed. Beware that some H2 servers may
|
||||
occasionally cause a few glitches over long lasting connection, so any non-
|
||||
zero value here should probably be in the hundreds or thousands to be
|
||||
effective without affecting slightly bogus servers.
|
||||
effective without affecting slightly bogus servers. It is also possible to
|
||||
only kill connections when the CPU usage crosses a certain level, by using
|
||||
"tune.glitches.kill.cpu-usage".
|
||||
|
||||
See also: tune.h2.fe.glitches-threshold, bc_glitches
|
||||
See also: tune.h2.fe.glitches-threshold, bc_glitches, and
|
||||
tune.glitches.kill.cpu-usage
|
||||
|
||||
tune.h2.be.initial-window-size <number>
|
||||
Sets the HTTP/2 initial window size for outgoing connections, which is the
|
||||
@ -4090,9 +4112,12 @@ tune.h2.fe.glitches-threshold <number>
|
||||
event will cause a connection to be closed. Beware that some H2 clientss may
|
||||
occasionally cause a few glitches over long lasting connection, so any non-
|
||||
zero value here should probably be in the hundreds or thousands to be
|
||||
effective without affecting slightly bogus clients.
|
||||
effective without affecting slightly bogus clients. It is also possible to
|
||||
only kill connections when the CPU usage crosses a certain level, by using
|
||||
"tune.glitches.kill.cpu-usage".
|
||||
|
||||
See also: tune.h2.be.glitches-threshold, fc_glitches
|
||||
See also: tune.h2.be.glitches-threshold, fc_glitches, and
|
||||
tune.glitches.kill.cpu-usage
|
||||
|
||||
tune.h2.fe.initial-window-size <number>
|
||||
Sets the HTTP/2 initial window size for incoming connections, which is the
|
||||
@ -4663,9 +4688,11 @@ tune.quic.frontend.glitches-threshold <number>
|
||||
event will cause a connection to be closed. Beware that some QUIC clients may
|
||||
occasionally cause a few glitches over long lasting connection, so any non-
|
||||
zero value here should probably be in the hundreds or thousands to be
|
||||
effective without affecting slightly bogus clients.
|
||||
effective without affecting slightly bogus clients. It is also possible to
|
||||
only kill connections when the CPU usage crosses a certain level, by using
|
||||
"tune.glitches.kill.cpu-usage".
|
||||
|
||||
See also: fc_glitches
|
||||
See also: fc_glitches, tune.glitches.kill.cpu-usage
|
||||
|
||||
tune.quic.frontend.max-data-size <size>
|
||||
This setting is the hard limit for the number of data bytes in flight over a
|
||||
|
@ -197,6 +197,7 @@ struct global {
|
||||
int pattern_cache; /* max number of entries in the pattern cache. */
|
||||
int sslcachesize; /* SSL cache size in session, defaults to 20000 */
|
||||
int comp_maxlevel; /* max HTTP compression level */
|
||||
uint glitch_kill_maxidle; /* have glitches kill only below this level of idle */
|
||||
int pool_low_ratio; /* max ratio of FDs used before we stop using new idle connections */
|
||||
int pool_high_ratio; /* max ratio of FDs used before we start killing idle connections when creating new connections */
|
||||
int pool_low_count; /* max number of opened fd before we stop using new idle connections */
|
||||
|
@ -1404,6 +1404,18 @@ static int cfg_parse_global_tune_opts(char **args, int section_type,
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (strcmp(args[0], "tune.glitches.kill.cpu-usage") == 0) {
|
||||
if (*(args[1]) == 0) {
|
||||
memprintf(err, "'%s' expects a numeric value between 0 and 100", args[0]);
|
||||
return -1;
|
||||
}
|
||||
global.tune.glitch_kill_maxidle = 100 - atoi(args[1]);
|
||||
if (global.tune.glitch_kill_maxidle > 100) {
|
||||
memprintf(err, "'%s' expects a numeric value between 0 and 100", args[0]);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
BUG_ON(1, "Triggered in cfg_parse_global_tune_opts() by unsupported keyword.");
|
||||
return -1;
|
||||
@ -1768,6 +1780,7 @@ static struct cfg_kw_list cfg_kws = {ILH, {
|
||||
{ CFG_GLOBAL, "tune.comp.maxlevel", cfg_parse_global_tune_opts },
|
||||
{ CFG_GLOBAL, "tune.disable-fast-forward", cfg_parse_global_tune_forward_opts },
|
||||
{ CFG_GLOBAL, "tune.disable-zero-copy-forwarding", cfg_parse_global_tune_forward_opts },
|
||||
{ CFG_GLOBAL, "tune.glitches.kill.cpu-usage", cfg_parse_global_tune_opts },
|
||||
{ CFG_GLOBAL, "tune.http.cookielen", cfg_parse_global_tune_opts },
|
||||
{ CFG_GLOBAL, "tune.http.logurilen", cfg_parse_global_tune_opts },
|
||||
{ CFG_GLOBAL, "tune.http.maxhdr", cfg_parse_global_tune_opts },
|
||||
|
@ -187,6 +187,7 @@ struct global global = {
|
||||
.sslcachesize = SSLCACHESIZE,
|
||||
#endif
|
||||
.comp_maxlevel = 1,
|
||||
.glitch_kill_maxidle = 100,
|
||||
#ifdef DEFAULT_IDLE_TIMER
|
||||
.idle_timer = DEFAULT_IDLE_TIMER,
|
||||
#else
|
||||
|
@ -1682,7 +1682,8 @@ static inline int _h2c_report_glitch(struct h2c *h2c, int increment)
|
||||
h2_be_glitches_threshold : h2_fe_glitches_threshold;
|
||||
|
||||
h2c->glitches += increment;
|
||||
if (thres && h2c->glitches >= thres) {
|
||||
if (thres && h2c->glitches >= thres &&
|
||||
(th_ctx->idle_pct <= global.tune.glitch_kill_maxidle)) {
|
||||
h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM);
|
||||
return 1;
|
||||
}
|
||||
|
@ -769,7 +769,8 @@ int _qcc_report_glitch(struct qcc *qcc, int inc)
|
||||
const int max = global.tune.quic_frontend_glitches_threshold;
|
||||
|
||||
qcc->glitches += inc;
|
||||
if (max && qcc->glitches >= max && !(qcc->flags & QC_CF_ERRL)) {
|
||||
if (max && qcc->glitches >= max && !(qcc->flags & QC_CF_ERRL) &&
|
||||
(th_ctx->idle_pct <= global.tune.glitch_kill_maxidle)) {
|
||||
if (qcc->app_ops->report_susp) {
|
||||
qcc->app_ops->report_susp(qcc->ctx);
|
||||
qcc_set_error(qcc, qcc->err.code, 1);
|
||||
|
Loading…
Reference in New Issue
Block a user