haproxy/src/hpack-dec.c
Willy Tarreau 59a10fb53d MEDIUM: h2: change hpack_decode_headers() to only provide a list of headers
The current H2 to H1 protocol conversion presents some issues which will
require to perform some processing on certain headers before writing them
so it's not possible to convert HPACK to H1 on the fly.

This commit modifies the headers decoding so that it now works in two
phases : hpack_decode_headers() only decodes the HPACK stream in the
HEADERS frame and puts the result into a list. Headers which require
storage (huffman-compressed or from the dynamic table) are stored in
a chunk allocated by the H2 demuxer. Then once the headers are properly
decoded into this list, h2_make_h1_request() is called with this list
to produce the HTTP/1.1 request into the destination buffer. The list
necessarily enforces a limit. Here we use 2*MAX_HTTP_HDR, which means
that we can have as many individual cookies as we have regular headers
if a client decides to break their cookies into multiple values. This
seams reasonable and will allow the H1 parser to decide whether it's
too much or not.

Thus the output stream is not produced on the fly anymore and this will
permit to deal with certain corner cases like reparing the Cookie header
(which for now is not done).

In order to limit header duplication and parsing, the known pseudo headers
continue to be passed by their index : the name element in the list then
has a NULL pointer and the value is the pseudo header's index. Given that
these ones represent about half of the incoming requests and need to be
found quickly, it maintains an acceptable level of performance.

The code was significantly reduced by doing this because the orignal code
had to deal with HPACK and H1 combinations (eg: index vs not indexed, etc)
and now the HPACK decoding is totally focused on the decompression, and
the H1 encoding doesn't have to deal with the issue of wrapping input for
example.

One bug was addressed here (though it couldn't happen at the moment). The
H2 demuxer used to detect a failure to write the request into the H1 buffer
and would then detect if the output buffer wraps, realign it and try again.
The problem by doing so was that the HPACK context was already modified and
not rewindable. Thus the size check is now performed first and a failure is
reported if it doesn't fit.
2017-11-21 21:13:36 +01:00

398 lines
10 KiB
C

/*
* HPACK decompressor (RFC7541)
*
* Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
* Copyright (C) 2017 HAProxy Technologies
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <common/hpack-dec.h>
#include <common/hpack-huff.h>
#include <common/hpack-tbl.h>
#include <common/chunk.h>
#include <common/ist.h>
#include <types/global.h>
#if defined(DEBUG_HPACK)
#define hpack_debug_printf printf
#else
#define hpack_debug_printf(...) do { } while (0)
#endif
/* reads a varint from <raw>'s lowest <b> bits and <len> bytes max (raw included).
* returns the 32-bit value on success after updating raw_in and len_in. Forces
* len_in to (uint32_t)-1 on truncated input.
*/
static uint32_t get_var_int(const uint8_t **raw_in, uint32_t *len_in, int b)
{
uint32_t ret = 0;
int len = *len_in;
const uint8_t *raw = *raw_in;
uint8_t shift = 0;
len--;
ret = *(raw++) & ((1 << b) - 1);
if (ret != (uint32_t)((1 << b) - 1))
goto end;
while (1) {
if (!len)
goto too_short;
if (!(*raw & 128))
break;
ret += ((uint32_t)(*raw++) & 127) << shift;
shift += 7;
len--;
}
/* last 7 bits */
if (!len)
goto too_short;
len--;
ret += ((uint32_t)(*raw++) & 127) << shift;
end:
*raw_in = raw;
*len_in = len;
return ret;
too_short:
*len_in = (uint32_t)-1;
return 0;
}
/* returns the pseudo-header <idx> corresponds to among the following values :
* - 0 = unknown, the header's string needs to be used instead
* - 1 = ":authority"
* - 2 = ":method"
* - 3 = ":path"
* - 4 = ":scheme"
* - 5 = ":status"
*/
static inline int hpack_idx_to_phdr(uint32_t idx)
{
if (idx > 14)
return 0;
idx >>= 1;
idx <<= 2;
return (0x55554321U >> idx) & 0xF;
}
/* If <idx> designates a static header, returns <in>. Otherwise allocates some
* room from chunk <store> to duplicate <in> into it and returns the string
* allocated there. In case of allocation failure, returns a string whose
* pointer is NULL.
*/
static inline struct ist hpack_alloc_string(struct chunk *store, int idx, struct ist in)
{
struct ist out;
if (idx < HPACK_SHT_SIZE)
return in;
out.len = in.len;
out.ptr = chunk_newstr(store);
if (unlikely(!out.ptr))
return out;
if (unlikely(store->len + out.len > store->size)) {
out.ptr = NULL;
return out;
}
store->len += out.len;
memcpy(out.ptr, in.ptr, out.len);
return out;
}
/* decode an HPACK frame starting at <raw> for <len> bytes, using the dynamic
* headers table <dht>, produces the output into list <list> of <list_size>
* entries max, and uses pre-allocated buffer <tmp> for temporary storage (some
* list elements will point to it). Some <list> name entries may be made of a
* NULL pointer and a len, in which case they will designate a pseudo header
* index according to the values returned by hpack_idx_to_phdr() above. The
* number of <list> entries used is returned on success, or <0 on failure, with
* the opposite one of the HPACK_ERR_* codes. A last element is always zeroed
* and is not counted in the number of returned entries. This way the caller
* can use list[].n.len == 0 as a marker for the end of list.
*/
int hpack_decode_frame(struct hpack_dht *dht, const uint8_t *raw, uint32_t len,
struct http_hdr *list, int list_size, struct chunk *tmp)
{
uint32_t idx;
uint32_t nlen;
uint32_t vlen;
uint8_t huff;
struct ist name;
struct ist value;
int must_index;
int ret;
chunk_reset(tmp);
ret = 0;
while (len) {
int __maybe_unused code = *raw; /* first byte, only for debugging */
must_index = 0;
if (*raw >= 0x80) {
/* indexed header field */
if (*raw == 0x80) {
hpack_debug_printf("unhandled code 0x%02x (raw=%p, len=%d)\n", *raw, raw, len);
ret = -HPACK_ERR_UNKNOWN_OPCODE;
goto leave;
}
hpack_debug_printf("%02x: p14: indexed header field : ", code);
idx = get_var_int(&raw, &len, 7);
if (len == (uint32_t)-1) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
value = hpack_alloc_string(tmp, idx, hpack_idx_to_value(dht, idx));
if (!value.ptr) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
/* here we don't index so we can always keep the pseudo header number */
name = ist2(NULL, hpack_idx_to_phdr(idx));
if (!name.len) {
name = hpack_alloc_string(tmp, idx, hpack_idx_to_name(dht, idx));
if (!name.ptr) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
}
/* <name> and <value> are now set and point to stable values */
}
else if (*raw >= 0x20 && *raw <= 0x3f) {
/* max dyn table size change */
idx = get_var_int(&raw, &len, 5);
if (len == (uint32_t)-1) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
continue;
}
else if (!(*raw & (*raw - 0x10))) {
/* 0x00, 0x10, and 0x40 (0x20 and 0x80 were already handled above) */
/* literal header field without/never/with incremental indexing -- literal name */
if (*raw == 0x00)
hpack_debug_printf("%02x: p17: literal without indexing : ", code);
else if (*raw == 0x10)
hpack_debug_printf("%02x: p18: literal never indexed : ", code);
else if (*raw == 0x40)
hpack_debug_printf("%02x: p16: literal with indexing : ", code);
if (*raw == 0x40)
must_index = 1;
raw++; len--;
/* retrieve name */
if (!len) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
huff = *raw & 0x80;
nlen = get_var_int(&raw, &len, 7);
if (len == (uint32_t)-1 || len < nlen) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
name = ist2(raw, nlen);
raw += nlen;
len -= nlen;
if (huff) {
char *ntrash = chunk_newstr(tmp);
if (!ntrash) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
nlen = huff_dec((const uint8_t *)name.ptr, name.len, ntrash, tmp->size - tmp->len);
if (nlen == (uint32_t)-1) {
hpack_debug_printf("2: can't decode huffman.\n");
ret = -HPACK_ERR_HUFFMAN;
goto leave;
}
tmp->len += nlen; // make room for the value
name = ist2(ntrash, nlen);
}
/* retrieve value */
if (!len) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
huff = *raw & 0x80;
vlen = get_var_int(&raw, &len, 7);
if (len == (uint32_t)-1 || len < vlen) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
value = ist2(raw, vlen);
raw += vlen;
len -= vlen;
if (huff) {
char *vtrash = chunk_newstr(tmp);
if (!vtrash) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
vlen = huff_dec((const uint8_t *)value.ptr, value.len, vtrash, tmp->size - tmp->len);
if (vlen == (uint32_t)-1) {
hpack_debug_printf("3: can't decode huffman.\n");
ret = -HPACK_ERR_HUFFMAN;
goto leave;
}
tmp->len += vlen; // make room for the value
value = ist2(vtrash, vlen);
}
/* <name> and <value> are correctly filled here */
}
else {
/* 0x01..0x0f : literal header field without indexing -- indexed name */
/* 0x11..0x1f : literal header field never indexed -- indexed name */
/* 0x41..0x7f : literal header field with incremental indexing -- indexed name */
if (*raw <= 0x0f)
hpack_debug_printf("%02x: p16: literal without indexing -- indexed name : ", code);
else if (*raw >= 0x41)
hpack_debug_printf("%02x: p15: literal with indexing -- indexed name : ", code);
else
hpack_debug_printf("%02x: p16: literal never indexed -- indexed name : ", code);
/* retrieve name index */
if (*raw >= 0x41) {
must_index = 1;
idx = get_var_int(&raw, &len, 6);
}
else
idx = get_var_int(&raw, &len, 4);
if (len == (uint32_t)-1 || !len) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
/* retrieve value */
huff = *raw & 0x80;
vlen = get_var_int(&raw, &len, 7);
if (len == (uint32_t)-1 || len < vlen) { // truncated
ret = -HPACK_ERR_TRUNCATED;
goto leave;
}
value = ist2(raw, vlen);
raw += vlen;
len -= vlen;
if (huff) {
char *vtrash = chunk_newstr(tmp);
if (!vtrash) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
vlen = huff_dec((const uint8_t *)value.ptr, value.len, vtrash, tmp->size - tmp->len);
if (vlen == (uint32_t)-1) {
hpack_debug_printf("1: can't decode huffman.\n");
ret = -HPACK_ERR_HUFFMAN;
goto leave;
}
tmp->len += vlen; // make room for the value
value = ist2(vtrash, vlen);
}
name = ist2(NULL, 0);
if (!must_index)
name.len = hpack_idx_to_phdr(idx);
if (!name.len)
name = hpack_idx_to_name(dht, idx);
/* <name> and <value> are correctly filled here */
}
/* here's what we have here :
* - name.len > 0
* - value is filled with either const data or data allocated from tmp
* - name.ptr == NULL && !must_index : known pseudo-header #name.len
* - name.ptr != NULL || must_index : general header, unknown pseudo-header or index needed
*/
if (ret >= list_size) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
list[ret].n = name;
list[ret].v = value;
ret++;
if (must_index && hpack_dht_insert(dht, name, value) < 0) {
hpack_debug_printf("failed to find some room in the dynamic table\n");
ret = -HPACK_ERR_DHT_INSERT_FAIL;
goto leave;
}
hpack_debug_printf("\e[1;34m%s\e[0m: ",
istpad(trash.str, name).ptr);
hpack_debug_printf("\e[1;35m%s\e[0m [idx=%d, used=%d]\n",
istpad(trash.str, value).ptr,
must_index, dht->used);
}
if (ret >= list_size) {
ret = -HPACK_ERR_TOO_LARGE;
goto leave;
}
/* put an end marker */
list[ret].n = list[ret].v = ist2(NULL, 0);
ret++;
leave:
return ret;
}