mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-24 23:31:40 +02:00
BUG/MEDIUM: connections: Set the tid for the old tasklet on takeover.
In the various takeover() methods, make sure we schedule the old tasklet on the old thread, as we don't want it to run on our own thread! This was causing a very rare crash when building with DEBUG_STRICT, seeing that either an FD's thread mask didn't match the thread ID in h1_io_cb(), or that stream_int_notify() would try to queue a task with the wrong tid_bit. In order to reproduce this, it is necessary to maintain many connections (typically 30k) at a high request rate flowing over H1+SSL between two proxies, the second of which would randomly reject ~1% of the incoming connection and randomly killing some idle ones using a very short client timeout. The request rate must be adjusted so that the CPUs are nearly saturated, but never reach 100%. It's easier to reproduce this by skipping local connections and always picking from other threads. The issue should happen in less than 20s otherwise it's necessary to restart to reset the idle connections lists. No backport is needed, takeover() is 2.2 only.
This commit is contained in:
parent
43079e0731
commit
1662cdb0c6
@ -390,7 +390,7 @@ struct mux_ops {
|
||||
void (*reset)(struct connection *conn); /* Reset the mux, because we're re-trying to connect */
|
||||
const struct cs_info *(*get_cs_info)(struct conn_stream *cs); /* Return info on the specified conn_stream or NULL if not defined */
|
||||
int (*ctl)(struct connection *conn, enum mux_ctl_type mux_ctl, void *arg); /* Provides information about the mux */
|
||||
int (*takeover)(struct connection *conn); /* Attempts to migrate the connection to the current thread */
|
||||
int (*takeover)(struct connection *conn, int orig_tid); /* Attempts to migrate the connection to the current thread */
|
||||
unsigned int flags; /* some flags characterizing the mux's capabilities (MX_FL_*) */
|
||||
char name[8]; /* mux layer name, zero-terminated */
|
||||
};
|
||||
|
@ -1135,7 +1135,7 @@ static struct connection *conn_backend_get(struct server *srv, int is_safe)
|
||||
|
||||
HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[i].takeover_lock);
|
||||
mt_list_for_each_entry_safe(conn, &mt_list[i], list, elt1, elt2) {
|
||||
if (conn->mux->takeover && conn->mux->takeover(conn) == 0) {
|
||||
if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
|
||||
MT_LIST_DEL_SAFE(elt1);
|
||||
_HA_ATOMIC_ADD(&activity[tid].fd_takeover, 1);
|
||||
found = 1;
|
||||
@ -1145,7 +1145,7 @@ static struct connection *conn_backend_get(struct server *srv, int is_safe)
|
||||
|
||||
if (!found && !is_safe && srv->curr_safe_nb > 0) {
|
||||
mt_list_for_each_entry_safe(conn, &srv->safe_conns[i], list, elt1, elt2) {
|
||||
if (conn->mux->takeover && conn->mux->takeover(conn) == 0) {
|
||||
if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
|
||||
MT_LIST_DEL_SAFE(elt1);
|
||||
_HA_ATOMIC_ADD(&activity[tid].fd_takeover, 1);
|
||||
found = 1;
|
||||
|
@ -4084,7 +4084,7 @@ static void fcgi_show_fd(struct buffer *msg, struct connection *conn)
|
||||
* Return 0 if successful, non-zero otherwise.
|
||||
* Expected to be called with the old thread lock held.
|
||||
*/
|
||||
static int fcgi_takeover(struct connection *conn)
|
||||
static int fcgi_takeover(struct connection *conn, int orig_tid)
|
||||
{
|
||||
struct fcgi_conn *fcgi = conn->ctx;
|
||||
struct task *task;
|
||||
@ -4098,7 +4098,7 @@ static int fcgi_takeover(struct connection *conn)
|
||||
* set its context to NULL;
|
||||
*/
|
||||
fcgi->wait_event.tasklet->context = NULL;
|
||||
tasklet_wakeup(fcgi->wait_event.tasklet);
|
||||
tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid);
|
||||
|
||||
task = fcgi->task;
|
||||
if (task) {
|
||||
|
@ -2922,7 +2922,7 @@ static int add_hdr_case_adjust(const char *from, const char *to, char **err)
|
||||
* Return 0 if successful, non-zero otherwise.
|
||||
* Expected to be called with the old thread lock held.
|
||||
*/
|
||||
static int h1_takeover(struct connection *conn)
|
||||
static int h1_takeover(struct connection *conn, int orig_tid)
|
||||
{
|
||||
struct h1c *h1c = conn->ctx;
|
||||
struct task *task;
|
||||
@ -2936,7 +2936,7 @@ static int h1_takeover(struct connection *conn)
|
||||
* set its context to NULL.
|
||||
*/
|
||||
h1c->wait_event.tasklet->context = NULL;
|
||||
tasklet_wakeup(h1c->wait_event.tasklet);
|
||||
tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid);
|
||||
|
||||
task = h1c->task;
|
||||
if (task) {
|
||||
|
@ -6055,7 +6055,7 @@ static void h2_show_fd(struct buffer *msg, struct connection *conn)
|
||||
* Return 0 if successful, non-zero otherwise.
|
||||
* Expected to be called with the old thread lock held.
|
||||
*/
|
||||
static int h2_takeover(struct connection *conn)
|
||||
static int h2_takeover(struct connection *conn, int orig_tid)
|
||||
{
|
||||
struct h2c *h2c = conn->ctx;
|
||||
struct task *task;
|
||||
@ -6069,7 +6069,7 @@ static int h2_takeover(struct connection *conn)
|
||||
* set its context to NULL.
|
||||
*/
|
||||
h2c->wait_event.tasklet->context = NULL;
|
||||
tasklet_wakeup(h2c->wait_event.tasklet);
|
||||
tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid);
|
||||
|
||||
task = h2c->task;
|
||||
if (task) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user