haproxy/src/quic_cid.c
Amaury Denoyelle a5801e542d MINOR: quic: split global CID tree between FE and BE sides
QUIC CIDs are stored in a global tree. Prior to this patch, CIDs used on
both frontend and backend sides were mixed together.

This patch implement CID storage separation between FE and BE sides. The
original tre quic_cid_trees is splitted as
quic_fe_cid_trees/quic_be_cid_trees.

This patch should reduce contention between frontend and backend usages.
Also, it should reduce the risk of random CID collision.
2025-11-25 14:30:18 +01:00

536 lines
15 KiB
C

#include <stdlib.h>
#include <import/eb64tree.h>
#include <import/ebmbtree.h>
#include <haproxy/bug.h>
#include <haproxy/errors.h>
#include <haproxy/pool.h>
#include <haproxy/quic_cid.h>
#include <haproxy/quic_conn.h>
#include <haproxy/quic_rx-t.h>
#include <haproxy/quic_trace.h>
#include <haproxy/trace.h>
#include <haproxy/xxhash.h>
/* *** QUIC CID handling general principles
*
* . CID global storage
* CIDs generated by haproxy and reuse by the peer as DCID are stored in a
* global tree. Tree access must only be done under lock protection. Separate
* trees are used on frontend and backend sides.
*
* . CID global tree splitting
* To reduce the thread contention, a global CID tree is in reality splitted
* into 256 distinct instances. Each CID is assigned to a single tree instance
* based on its content. Use quic_cid_tree_idx() to retrieve the expected tree
* location for a CID.
*
* . ODCID handling
* ODCID are never stored in global CID tree. This allows to reduce tree size
* as clients are expected to switch quickly to a new haproxy assigned CID.
* This new CID value is derived by haproxy from the ODCID plus a bunch of
* other parameters. If ODCID is reused by the client, first lookup in global
* CID tree won't be successful. In this case, derive operation is performed
* again before a new tree lookup.
*/
#define QUIC_CID_TREES_CNT 256
struct quic_cid_tree *quic_fe_cid_trees;
struct quic_cid_tree *quic_be_cid_trees;
/* Initialize the stateless reset token attached to <conn_id> connection ID.
* Returns 1 if succeeded, 0 if not.
*/
static int quic_stateless_reset_token_init(struct quic_connection_id *conn_id)
{
/* Output secret */
unsigned char *token = conn_id->stateless_reset_token;
size_t tokenlen = sizeof conn_id->stateless_reset_token;
/* Salt */
const unsigned char *cid = conn_id->cid.data;
size_t cidlen = conn_id->cid.len;
return quic_stateless_reset_token_cpy(token, tokenlen, cid, cidlen);
}
/* Generate a CID directly derived from <orig> CID and <addr> address.
*
* This function is used to calculate the first connection CID derived from
* client ODCID. This allows to optimize CID global tree by not inserting ODCID
* as client is expected to replace it early.
*
* Returns the derived CID.
*/
static struct quic_cid quic_derive_cid(const struct quic_cid *orig,
const struct sockaddr_storage *addr)
{
struct quic_cid cid;
const struct sockaddr_in *in;
const struct sockaddr_in6 *in6;
char *pos = trash.area;
size_t idx = 0;
uint64_t hash;
int i;
/* Prepare buffer for hash using original CID first. */
memcpy(pos, orig->data, orig->len);
idx += orig->len;
/* Concatenate client address. */
switch (addr->ss_family) {
case AF_INET:
in = (struct sockaddr_in *)addr;
memcpy(&pos[idx], &in->sin_addr, sizeof(in->sin_addr));
idx += sizeof(in->sin_addr);
memcpy(&pos[idx], &in->sin_port, sizeof(in->sin_port));
idx += sizeof(in->sin_port);
break;
case AF_INET6:
in6 = (struct sockaddr_in6 *)addr;
memcpy(&pos[idx], &in6->sin6_addr, sizeof(in6->sin6_addr));
idx += sizeof(in6->sin6_addr);
memcpy(&pos[idx], &in6->sin6_port, sizeof(in6->sin6_port));
idx += sizeof(in6->sin6_port);
break;
default:
/* TODO to implement */
ABORT_NOW();
}
/* Avoid similar values between multiple haproxy process. */
memcpy(&pos[idx], boot_seed, sizeof(boot_seed));
idx += sizeof(boot_seed);
/* Hash the final buffer content. */
hash = XXH64(pos, idx, 0);
for (i = 0; i < sizeof(hash); ++i)
cid.data[i] = hash >> ((sizeof(hash) * 7) - (8 * i));
cid.len = sizeof(hash);
return cid;
}
/* Allocate a quic_connection_id object and associate it to the current thread.
* The CID object is not yet associated to a connection or inserted in any tree
* storage.
*
* Returns the CID or NULL on allocation failure.
*/
struct quic_connection_id *quic_cid_alloc(enum quic_cid_side side)
{
struct quic_connection_id *conn_id;
/* TODO use a better trace scope */
TRACE_ENTER(QUIC_EV_CONN_TXPKT);
conn_id = pool_alloc(pool_head_quic_connection_id);
if (!conn_id) {
TRACE_ERROR("cid allocation failed", QUIC_EV_CONN_TXPKT);
goto err;
}
HA_ATOMIC_STORE(&conn_id->tid, tid);
conn_id->qc = NULL;
conn_id->side = side;
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return conn_id;
err:
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return NULL;
}
/* Generate the value of <conn_id> and its associated stateless token. The CID
* value is calculated from a random generator.
*
* Returns 0 on success else non-zero.
*/
int quic_cid_generate_random(struct quic_connection_id *conn_id)
{
/* TODO use a better trace scope */
TRACE_ENTER(QUIC_EV_CONN_TXPKT);
conn_id->cid.len = QUIC_HAP_CID_LEN;
TRACE_DEVEL("generate CID value from random generator", QUIC_EV_CONN_TXPKT);
if (RAND_bytes(conn_id->cid.data, conn_id->cid.len) != 1) {
/* TODO: RAND_bytes() should be replaced */
TRACE_ERROR("RAND_bytes() failed", QUIC_EV_CONN_TXPKT);
goto err;
}
if (quic_stateless_reset_token_init(conn_id) != 1) {
TRACE_ERROR("quic_stateless_reset_token_init() failed", QUIC_EV_CONN_TXPKT);
goto err;
}
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return 0;
err:
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return 1;
}
/* Generate the value of <conn_id> and its associated stateless token. The CID
* value is calculated via quic_newcid_from_hash64() external callback with
* <hash64> as input.
*
* Returns 0 on success else non-zero.
*/
int quic_cid_generate_from_hash(struct quic_connection_id *conn_id, uint64_t hash64)
{
/* TODO use a better trace scope */
TRACE_ENTER(QUIC_EV_CONN_TXPKT);
conn_id->cid.len = QUIC_HAP_CID_LEN;
TRACE_DEVEL("calculate CID value from conn hash", QUIC_EV_CONN_TXPKT);
quic_newcid_from_hash64(conn_id->cid.data, conn_id->cid.len, hash64,
global.cluster_secret, sizeof(global.cluster_secret));
if (quic_stateless_reset_token_init(conn_id) != 1) {
TRACE_ERROR("quic_stateless_reset_token_init() failed", QUIC_EV_CONN_TXPKT);
goto err;
}
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return 0;
err:
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return 1;
}
/* Generate the value of <conn_id> and its associated stateless token. The CID
* value is derived from client <orig> ODCID and <addr> address. This is an
* alternative method to other CID generation methods which is used for the
* first CID of a server connection in response to a client INITIAL packet.
*
* The benefit of this CID value is to skip storage of ODCIDs in the global
* CIDs tree. This is an optimization to reduce contention on the CIDs tree
* given that ODCIDs usage is quite limited during a connection lifetime.
* Client address is used to reduce the collision risk.
*
* Returns 0 on success else non-zero.
*/
int quic_cid_derive_from_odcid(struct quic_connection_id *conn_id,
const struct quic_cid *orig,
const struct sockaddr_storage *addr)
{
/* TODO use a better trace scope */
TRACE_ENTER(QUIC_EV_CONN_TXPKT);
conn_id->cid.len = QUIC_HAP_CID_LEN;
TRACE_DEVEL("derive CID value from a client ODCID", QUIC_EV_CONN_TXPKT);
/* Derive the new CID value from original CID. */
conn_id->cid = quic_derive_cid(orig, addr);
if (quic_stateless_reset_token_init(conn_id) != 1) {
TRACE_ERROR("quic_stateless_reset_token_init() failed", QUIC_EV_CONN_TXPKT);
goto err;
}
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return 0;
err:
TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
return 1;
}
/* Store <conn_id> CID into <qc> connection tree, associated with the next
* sequence number available. The CID should already be stored in the global
* tree to ensure there is no value collision.
*/
void quic_cid_register_seq_num(struct quic_connection_id *conn_id,
struct quic_conn *qc)
{
/* TODO use a better trace scope */
TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
conn_id->qc = qc;
conn_id->seq_num.key = qc->next_cid_seq_num;
conn_id->retire_prior_to = 0;
/* insert the allocated CID in the quic_conn tree */
eb64_insert(qc->cids, &conn_id->seq_num);
++qc->next_cid_seq_num;
TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
}
/* Insert <conn_id> in global CID tree. It may fail if a collision happens due
* to an identical CID already stored.
*
* If <new_tid> is non null, it will be used as an output parameter,
* initialized to -1 by default. In case of a CID collision, it will be set to
* the thread ID of the already stored CID.
*
* Returns 0 on insert success else non-zero.
*/
int quic_cid_insert(struct quic_connection_id *conn_id, int *new_tid)
{
struct ebmb_node *node;
struct quic_cid_tree *tree;
int ret;
if (new_tid)
*new_tid = -1;
tree = quic_cid_get_tree(conn_id);
HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len);
if (node != &conn_id->node) {
if (new_tid) {
conn_id = ebmb_entry(node, struct quic_connection_id, node);
*new_tid = HA_ATOMIC_LOAD(&conn_id->tid);
}
ret = -1;
}
else {
ret = 0;
}
HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
return ret;
}
/* Lookup CID in global CID tree equal to <cid> data with <cid_len> length. If
* found, ensure CID instance is linked to <qc> connection.
*
* Returns a boolean value.
*/
int quic_cmp_cid_conn(const unsigned char *cid, size_t cid_len,
struct quic_conn *qc)
{
struct quic_cid_tree *tree;
struct quic_connection_id *conn_id;
struct ebmb_node *node;
int ret = 0;
/* This function is only used on frontend side. */
tree = &quic_fe_cid_trees[_quic_cid_tree_idx(cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, cid, cid_len);
if (node) {
conn_id = ebmb_entry(node, struct quic_connection_id, node);
if (qc == conn_id->qc)
ret = 1;
}
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
return ret;
}
/* Retrieve the thread ID associated to QUIC connection ID <cid> of length
* <cid_len>. CID may be not found on the CID tree because it is an ODCID. In
* this case, it will derived using client address <cli_addr> as hash
* parameter. However, this is done only if <pos> points to an INITIAL or 0RTT
* packet of length <len>.
*
* Returns the thread ID or a negative error code.
*/
int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
const struct sockaddr_storage *cli_addr,
unsigned char *pos, size_t len)
{
struct quic_cid_tree *tree;
struct quic_connection_id *conn_id;
struct ebmb_node *node;
int cid_tid = -1;
/* This function is only used on frontend side. */
tree = &quic_fe_cid_trees[_quic_cid_tree_idx(cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, cid, cid_len);
if (node) {
conn_id = ebmb_entry(node, struct quic_connection_id, node);
cid_tid = HA_ATOMIC_LOAD(&conn_id->tid);
}
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
/* If CID not found, it may be an ODCID, thus not stored in global CID
* tree. Derive it to its associated DCID value and reperform a lookup.
*/
if (cid_tid < 0) {
struct quic_cid orig, derive_cid;
struct quic_rx_packet pkt;
if (!qc_parse_hd_form(&pkt, &pos, pos + len))
goto out;
/* ODCID are only used in INITIAL or 0-RTT packets */
if (pkt.type != QUIC_PACKET_TYPE_INITIAL &&
pkt.type != QUIC_PACKET_TYPE_0RTT) {
goto out;
}
memcpy(orig.data, cid, cid_len);
orig.len = cid_len;
derive_cid = quic_derive_cid(&orig, cli_addr);
tree = &quic_fe_cid_trees[quic_cid_tree_idx(&derive_cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, cid, cid_len);
if (node) {
conn_id = ebmb_entry(node, struct quic_connection_id, node);
cid_tid = HA_ATOMIC_LOAD(&conn_id->tid);
}
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
}
out:
return cid_tid;
}
/* Retrieve a quic_conn instance from the <pkt> DCID field. If the packet is an
* INITIAL or 0RTT type, we may have to use client address <saddr> if an ODCID
* is used.
*
* Returns the instance or NULL if not found.
*/
struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt,
struct sockaddr_storage *saddr,
int *new_tid)
{
struct quic_conn *qc = NULL;
struct ebmb_node *node;
struct quic_connection_id *conn_id;
struct quic_cid_tree *tree;
uint conn_id_tid;
TRACE_ENTER(QUIC_EV_CONN_RXPKT);
*new_tid = -1;
/* First look into DCID tree.
* This function is only used on frontend side.
*/
tree = &quic_fe_cid_trees[_quic_cid_tree_idx(pkt->dcid.data)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, pkt->dcid.data, pkt->dcid.len);
/* If not found on an Initial/0-RTT packet, it could be because an
* ODCID is reused by the client. Calculate the derived CID value to
* retrieve it from the DCID tree.
*/
if (!node && (pkt->type == QUIC_PACKET_TYPE_INITIAL ||
pkt->type == QUIC_PACKET_TYPE_0RTT)) {
const struct quic_cid derive_cid = quic_derive_cid(&pkt->dcid, saddr);
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
tree = &quic_fe_cid_trees[quic_cid_tree_idx(&derive_cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, derive_cid.data, derive_cid.len);
}
if (!node)
goto end;
conn_id = ebmb_entry(node, struct quic_connection_id, node);
conn_id_tid = HA_ATOMIC_LOAD(&conn_id->tid);
if (conn_id_tid != tid) {
*new_tid = conn_id_tid;
goto end;
}
/* Ensures that conn is always set if CID is found on its thread.
* Necessary to prevent loop of re-enqueued Initial packets.
*/
BUG_ON(!conn_id->qc);
qc = conn_id->qc;
TRACE_DEVEL("found connection", QUIC_EV_CONN_RXPKT, qc);
end:
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
return qc;
}
/* Build a NEW_CONNECTION_ID frame for <conn_id> CID of <qc> connection.
*
* Returns 1 on success else 0.
*/
int qc_build_new_connection_id_frm(struct quic_conn *qc,
struct quic_connection_id *conn_id)
{
int ret = 0;
struct quic_frame *frm;
struct quic_enc_level *qel;
TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
qel = qc->ael;
frm = qc_frm_alloc(QUIC_FT_NEW_CONNECTION_ID);
if (!frm) {
TRACE_ERROR("frame allocation error", QUIC_EV_CONN_IO_CB, qc);
goto leave;
}
quic_connection_id_to_frm_cpy(frm, conn_id);
LIST_APPEND(&qel->pktns->tx.frms, &frm->list);
ret = 1;
leave:
TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
return ret;
}
static int quic_alloc_global_fe_cid_tree(void)
{
int i;
quic_fe_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(*quic_fe_cid_trees));
if (!quic_fe_cid_trees) {
ha_alert("Failed to allocate global quic CIDs trees.\n");
return 0;
}
for (i = 0; i < QUIC_CID_TREES_CNT; ++i) {
HA_RWLOCK_INIT(&quic_fe_cid_trees[i].lock);
quic_fe_cid_trees[i].root = EB_ROOT_UNIQUE;
}
return 1;
}
REGISTER_POST_CHECK(quic_alloc_global_fe_cid_tree);
static int quic_deallocate_global_fe_cid_tree(void)
{
ha_free(&quic_fe_cid_trees);
return 1;
}
REGISTER_POST_DEINIT(quic_deallocate_global_fe_cid_tree);
static int quic_alloc_global_be_cid_tree(void)
{
int i;
quic_be_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(*quic_be_cid_trees));
if (!quic_be_cid_trees) {
ha_alert("Failed to allocate global quic CIDs trees.\n");
return 0;
}
for (i = 0; i < QUIC_CID_TREES_CNT; ++i) {
HA_RWLOCK_INIT(&quic_be_cid_trees[i].lock);
quic_be_cid_trees[i].root = EB_ROOT_UNIQUE;
}
return 1;
}
REGISTER_POST_CHECK(quic_alloc_global_be_cid_tree);
static int quic_deallocate_global_be_cid_tree(void)
{
ha_free(&quic_be_cid_trees);
return 1;
}
REGISTER_POST_DEINIT(quic_deallocate_global_be_cid_tree);