diff --git a/Makefile b/Makefile index e6c94c894..b38964476 100644 --- a/Makefile +++ b/Makefile @@ -750,7 +750,8 @@ OBJS = src/haproxy.o src/base64.o src/protocol.o \ src/session.o src/stream.o src/hdr_idx.o src/ev_select.o src/signal.o \ src/acl.o src/sample.o src/memory.o src/freq_ctr.o src/auth.o src/proto_udp.o \ src/compression.o src/payload.o src/hash.o src/pattern.o src/map.o \ - src/namespace.o src/mailers.o src/dns.o src/vars.o src/filters.o + src/namespace.o src/mailers.o src/dns.o src/vars.o src/filters.o \ + src/flt_http_comp.o EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o \ $(EBTREE_DIR)/eb32tree.o $(EBTREE_DIR)/eb64tree.o \ diff --git a/include/proto/compression.h b/include/proto/compression.h index 5c7c8cb98..7b60472c7 100644 --- a/include/proto/compression.h +++ b/include/proto/compression.h @@ -30,11 +30,6 @@ extern unsigned int compress_min_idle; int comp_append_type(struct comp *comp, const char *type); int comp_append_algo(struct comp *comp, const char *algo); -int http_emit_chunk_size(char *end, unsigned int chksz); -int http_compression_buffer_init(struct stream *s, struct buffer *in, struct buffer *out); -int http_compression_buffer_add_data(struct stream *s, struct buffer *in, struct buffer *out); -int http_compression_buffer_end(struct stream *s, struct buffer **in, struct buffer **out, int end); - #ifdef USE_ZLIB extern long zlib_used_memory; #endif /* USE_ZLIB */ diff --git a/include/proto/flt_http_comp.h b/include/proto/flt_http_comp.h new file mode 100644 index 000000000..587db0d09 --- /dev/null +++ b/include/proto/flt_http_comp.h @@ -0,0 +1,38 @@ +/* + * include/proto/flt_http_comp.h + * This file defines function prototypes for the compression filter. + * + * Copyright (C) 2015 Qualys Inc., Christopher Faulet + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _PROTO_FLT_HTTP_COMP_H +#define _PROTO_FLT_HTTP_COMP_H + +/* NOTE: This is a temporary header file. It will be removed when the + * compression filter will added */ + +#include +#include + +int select_compression_request_header(struct stream *s, struct buffer *req); +int select_compression_response_header(struct stream *s, struct buffer *res); + +int http_compression_buffer_init(struct stream *s, struct buffer *in, struct buffer *out); +int http_compression_buffer_add_data(struct stream *s, struct buffer *in, struct buffer *out); +int http_compression_buffer_end(struct stream *s, struct buffer **in, struct buffer **out, int end); + + +#endif /* _PROTO_FLT_HTTP_COMP_H */ diff --git a/include/proto/proto_http.h b/include/proto/proto_http.h index 9317a551e..4d8f5365b 100644 --- a/include/proto/proto_http.h +++ b/include/proto/proto_http.h @@ -154,6 +154,8 @@ enum act_return http_action_req_capture_by_id(struct act_rule *rule, struct prox enum act_return http_action_res_capture_by_id(struct act_rule *rule, struct proxy *px, struct session *sess, struct stream *s, int flags); +int parse_qvalue(const char *qvalue, const char **end); + /* Note: these functions *do* modify the sample. Even in case of success, at * least the type and uint value are modified. */ diff --git a/src/cfgparse.c b/src/cfgparse.c index 22ff6af4a..d343f0515 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -6536,70 +6536,6 @@ stats_error_parsing: free(err); } } - else if (!strcmp(args[0], "compression")) { - struct comp *comp; - if (curproxy->comp == NULL) { - comp = calloc(1, sizeof(struct comp)); - curproxy->comp = comp; - } else { - comp = curproxy->comp; - } - - if (!strcmp(args[1], "algo")) { - int cur_arg; - struct comp_ctx *ctx; - - cur_arg = 2; - if (!*args[cur_arg]) { - Alert("parsing [%s:%d] : '%s' expects \n", - file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - while (*(args[cur_arg])) { - if (comp_append_algo(comp, args[cur_arg]) < 0) { - Alert("parsing [%s:%d] : '%s' : '%s' is not a supported algorithm.\n", - file, linenum, args[0], args[cur_arg]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - if (curproxy->comp->algos->init(&ctx, 9) == 0) { - curproxy->comp->algos->end(&ctx); - } else { - Alert("parsing [%s:%d] : '%s' : Can't init '%s' algorithm.\n", - file, linenum, args[0], args[cur_arg]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - cur_arg ++; - continue; - } - } - else if (!strcmp(args[1], "offload")) { - comp->offload = 1; - } - else if (!strcmp(args[1], "type")) { - int cur_arg; - cur_arg = 2; - if (!*args[cur_arg]) { - Alert("parsing [%s:%d] : '%s' expects \n", - file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - while (*(args[cur_arg])) { - comp_append_type(comp, args[cur_arg]); - cur_arg ++; - continue; - } - } - else { - Alert("parsing [%s:%d] : '%s' expects 'algo', 'type' or 'offload'\n", - file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - } else { struct cfg_kw_list *kwl; int index; diff --git a/src/compression.c b/src/compression.c index 471c102b1..7b5a93915 100644 --- a/src/compression.c +++ b/src/compression.c @@ -137,220 +137,6 @@ int comp_append_algo(struct comp *comp, const char *algo) return -1; } -/* emit the chunksize followed by a CRLF on the output and return the number of - * bytes written. It goes backwards and starts with the byte before . It - * returns the number of bytes written which will not exceed 10 (8 digits, CR, - * and LF). The caller is responsible for ensuring there is enough room left in - * the output buffer for the string. - */ -int http_emit_chunk_size(char *end, unsigned int chksz) -{ - char *beg = end; - - *--beg = '\n'; - *--beg = '\r'; - do { - *--beg = hextab[chksz & 0xF]; - } while (chksz >>= 4); - return end - beg; -} - -/* - * Init HTTP compression - */ -int http_compression_buffer_init(struct stream *s, struct buffer *in, struct buffer *out) -{ - /* output stream requires at least 10 bytes for the gzip header, plus - * at least 8 bytes for the gzip trailer (crc+len), plus a possible - * plus at most 5 bytes per 32kB block and 2 bytes to close the stream. - */ - if (in->size - buffer_len(in) < 20 + 5 * ((in->i + 32767) >> 15)) - return -1; - - /* prepare an empty output buffer in which we reserve enough room for - * copying the output bytes from , plus 10 extra bytes to write - * the chunk size. We don't copy the bytes yet so that if we have to - * cancel the operation later, it's cheap. - */ - b_reset(out); - out->o = in->o; - out->p += out->o; - out->i = 10; - return 0; -} - -/* - * Add data to compress - */ -int http_compression_buffer_add_data(struct stream *s, struct buffer *in, struct buffer *out) -{ - struct http_msg *msg = &s->txn->rsp; - int consumed_data = 0; - int data_process_len; - int block1, block2; - - /* - * Temporarily skip already parsed data and chunks to jump to the - * actual data block. It is fixed before leaving. - */ - b_adv(in, msg->next); - - /* - * select the smallest size between the announced chunk size, the input - * data, and the available output buffer size. The compressors are - * assumed to be able to process all the bytes we pass to them at once. - */ - data_process_len = MIN(in->i, msg->chunk_len); - data_process_len = MIN(out->size - buffer_len(out), data_process_len); - - block1 = data_process_len; - if (block1 > bi_contig_data(in)) - block1 = bi_contig_data(in); - block2 = data_process_len - block1; - - /* compressors return < 0 upon error or the amount of bytes read */ - consumed_data = s->comp_algo->add_data(s->comp_ctx, bi_ptr(in), block1, out); - if (consumed_data >= 0 && block2 > 0) { - consumed_data = s->comp_algo->add_data(s->comp_ctx, in->data, block2, out); - if (consumed_data >= 0) - consumed_data += block1; - } - - /* restore original buffer pointer */ - b_rew(in, msg->next); - return consumed_data; -} - -/* - * Flush data in process, and write the header and footer of the chunk. Upon - * success, in and out buffers are swapped to avoid a copy. - */ -int http_compression_buffer_end(struct stream *s, struct buffer **in, struct buffer **out, int end) -{ - int to_forward; - int left; - struct http_msg *msg = &s->txn->rsp; - struct buffer *ib = *in, *ob = *out; - char *tail; - -#if defined(USE_SLZ) || defined(USE_ZLIB) - int ret; - - /* flush data here */ - - if (end) - ret = s->comp_algo->finish(s->comp_ctx, ob); /* end of data */ - else - ret = s->comp_algo->flush(s->comp_ctx, ob); /* end of buffer */ - - if (ret < 0) - return -1; /* flush failed */ - -#endif /* USE_ZLIB */ - - if (ob->i == 10) { - /* No data were appended, let's drop the output buffer and - * keep the input buffer unchanged. - */ - return 0; - } - - /* OK so at this stage, we have an output buffer looking like this : - * - * <-- o --> <------ i -----> - * +---------+---+------------+-----------+ - * | out | c | comp_in | empty | - * +---------+---+------------+-----------+ - * data p size - * - * is the room reserved to copy ib->o. It starts at ob->data and - * has not yet been filled. is the room reserved to write the chunk - * size (10 bytes). is the compressed equivalent of the data - * part of ib->i. is the amount of empty bytes at the end of - * the buffer, into which we may have to copy the remaining bytes from - * ib->i after the data (chunk size, trailers, ...). - */ - - /* Write real size at the begining of the chunk, no need of wrapping. - * We write the chunk using a dynamic length and adjust ob->p and ob->i - * accordingly afterwards. That will move away from . - */ - left = 10 - http_emit_chunk_size(ob->p + 10, ob->i - 10); - ob->p += left; - ob->i -= left; - - /* Copy previous data from ib->o into ob->o */ - if (ib->o > 0) { - left = bo_contig_data(ib); - memcpy(ob->p - ob->o, bo_ptr(ib), left); - if (ib->o - left) /* second part of the buffer */ - memcpy(ob->p - ob->o + left, ib->data, ib->o - left); - } - - /* chunked encoding requires CRLF after data */ - tail = ob->p + ob->i; - *tail++ = '\r'; - *tail++ = '\n'; - - /* At the end of data, we must write the empty chunk 0, - * and terminate the trailers section with a last . If - * we're forwarding a chunked-encoded response, we'll have a - * trailers section after the empty chunk which needs to be - * forwarded and which will provide the last CRLF. Otherwise - * we write it ourselves. - */ - if (msg->msg_state >= HTTP_MSG_TRAILERS) { - memcpy(tail, "0\r\n", 3); - tail += 3; - if (msg->msg_state >= HTTP_MSG_ENDING) { - memcpy(tail, "\r\n", 2); - tail += 2; - } - } - ob->i = tail - ob->p; - - to_forward = ob->i; - - /* update input rate */ - if (s->comp_ctx && s->comp_ctx->cur_lvl > 0) { - update_freq_ctr(&global.comp_bps_in, msg->next); - strm_fe(s)->fe_counters.comp_in += msg->next; - s->be->be_counters.comp_in += msg->next; - } else { - strm_fe(s)->fe_counters.comp_byp += msg->next; - s->be->be_counters.comp_byp += msg->next; - } - - /* copy the remaining data in the tmp buffer. */ - b_adv(ib, msg->next); - msg->next = 0; - - if (ib->i > 0) { - left = bi_contig_data(ib); - memcpy(ob->p + ob->i, bi_ptr(ib), left); - ob->i += left; - if (ib->i - left) { - memcpy(ob->p + ob->i, ib->data, ib->i - left); - ob->i += ib->i - left; - } - } - - /* swap the buffers */ - *in = ob; - *out = ib; - - if (s->comp_ctx && s->comp_ctx->cur_lvl > 0) { - update_freq_ctr(&global.comp_bps_out, to_forward); - strm_fe(s)->fe_counters.comp_out += to_forward; - s->be->be_counters.comp_out += to_forward; - } - - /* forward the new chunk without remaining data */ - b_adv(ob, to_forward); - - return to_forward; -} - /* * Alloc the comp_ctx */ @@ -831,41 +617,6 @@ static int deflate_end(struct comp_ctx **comp_ctx) #endif /* USE_ZLIB */ -/* boolean, returns true if compression is used (either gzip or deflate) in the response */ -static int -smp_fetch_res_comp(const struct arg *args, struct sample *smp, const char *kw, void *private) -{ - smp->data.type = SMP_T_BOOL; - smp->data.u.sint = (smp->strm->comp_algo != NULL); - return 1; -} - -/* string, returns algo */ -static int -smp_fetch_res_comp_algo(const struct arg *args, struct sample *smp, const char *kw, void *private) -{ - if (!smp->strm->comp_algo) - return 0; - - smp->data.type = SMP_T_STR; - smp->flags = SMP_F_CONST; - smp->data.u.str.str = smp->strm->comp_algo->cfg_name; - smp->data.u.str.len = smp->strm->comp_algo->cfg_name_len; - return 1; -} - -/* Note: must not be declared as its list will be overwritten */ -static struct acl_kw_list acl_kws = {ILH, { - { /* END */ }, -}}; - -/* Note: must not be declared as its list will be overwritten */ -static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { - { "res.comp", smp_fetch_res_comp, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP }, - { "res.comp_algo", smp_fetch_res_comp_algo, 0, NULL, SMP_T_STR, SMP_USE_HRSHP }, - { /* END */ }, -}}; - __attribute__((constructor)) static void __comp_fetch_init(void) { @@ -873,6 +624,4 @@ static void __comp_fetch_init(void) slz_make_crc_table(); slz_prepare_dist_table(); #endif - acl_register_keywords(&acl_kws); - sample_register_fetches(&sample_fetch_keywords); } diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c new file mode 100644 index 000000000..5eaf0c9ee --- /dev/null +++ b/src/flt_http_comp.c @@ -0,0 +1,589 @@ +/* + * Stream filters related variables and functions. + * + * Copyright (C) 2015 Qualys Inc., Christopher Faulet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/***********************************************************************/ +/* + * Selects a compression algorithm depending on the client request. + */ +int +select_compression_request_header(struct stream *s, struct buffer *req) +{ + struct http_txn *txn = s->txn; + struct http_msg *msg = &txn->req; + struct hdr_ctx ctx; + struct comp_algo *comp_algo = NULL; + struct comp_algo *comp_algo_back = NULL; + + /* Disable compression for older user agents announcing themselves as "Mozilla/4" + * unless they are known good (MSIE 6 with XP SP2, or MSIE 7 and later). + * See http://zoompf.com/2012/02/lose-the-wait-http-compression for more details. + */ + ctx.idx = 0; + if (http_find_header2("User-Agent", 10, req->p, &txn->hdr_idx, &ctx) && + ctx.vlen >= 9 && + memcmp(ctx.line + ctx.val, "Mozilla/4", 9) == 0 && + (ctx.vlen < 31 || + memcmp(ctx.line + ctx.val + 25, "MSIE ", 5) != 0 || + ctx.line[ctx.val + 30] < '6' || + (ctx.line[ctx.val + 30] == '6' && + (ctx.vlen < 54 || memcmp(ctx.line + 51, "SV1", 3) != 0)))) { + s->comp_algo = NULL; + return 0; + } + + /* search for the algo in the backend in priority or the frontend */ + if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) || (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) { + int best_q = 0; + + ctx.idx = 0; + while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) { + const char *qval; + int q; + int toklen; + + /* try to isolate the token from the optional q-value */ + toklen = 0; + while (toklen < ctx.vlen && http_is_token[(unsigned char)*(ctx.line + ctx.val + toklen)]) + toklen++; + + qval = ctx.line + ctx.val + toklen; + while (1) { + while (qval < ctx.line + ctx.val + ctx.vlen && http_is_lws[(unsigned char)*qval]) + qval++; + + if (qval >= ctx.line + ctx.val + ctx.vlen || *qval != ';') { + qval = NULL; + break; + } + qval++; + + while (qval < ctx.line + ctx.val + ctx.vlen && http_is_lws[(unsigned char)*qval]) + qval++; + + if (qval >= ctx.line + ctx.val + ctx.vlen) { + qval = NULL; + break; + } + if (strncmp(qval, "q=", MIN(ctx.line + ctx.val + ctx.vlen - qval, 2)) == 0) + break; + + while (qval < ctx.line + ctx.val + ctx.vlen && *qval != ';') + qval++; + } + + /* here we have qval pointing to the first "q=" attribute or NULL if not found */ + q = qval ? parse_qvalue(qval + 2, NULL) : 1000; + + if (q <= best_q) + continue; + + for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) { + if (*(ctx.line + ctx.val) == '*' || + word_match(ctx.line + ctx.val, toklen, comp_algo->ua_name, comp_algo->ua_name_len)) { + s->comp_algo = comp_algo; + best_q = q; + break; + } + } + } + } + + /* remove all occurrences of the header when "compression offload" is set */ + if (s->comp_algo) { + if ((s->be->comp && s->be->comp->offload) || (strm_fe(s)->comp && strm_fe(s)->comp->offload)) { + http_remove_header2(msg, &txn->hdr_idx, &ctx); + ctx.idx = 0; + while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) { + http_remove_header2(msg, &txn->hdr_idx, &ctx); + } + } + return 1; + } + + /* identity is implicit does not require headers */ + if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) || (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) { + for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) { + if (comp_algo->cfg_name_len == 8 && memcmp(comp_algo->cfg_name, "identity", 8) == 0) { + s->comp_algo = comp_algo; + return 1; + } + } + } + + s->comp_algo = NULL; + return 0; +} + +/* + * Selects a comression algorithm depending of the server response. + */ +int +select_compression_response_header(struct stream *s, struct buffer *res) +{ + struct http_txn *txn = s->txn; + struct http_msg *msg = &txn->rsp; + struct hdr_ctx ctx; + struct comp_type *comp_type; + + /* no common compression algorithm was found in request header */ + if (s->comp_algo == NULL) + goto fail; + + /* HTTP < 1.1 should not be compressed */ + if (!(msg->flags & HTTP_MSGF_VER_11) || !(txn->req.flags & HTTP_MSGF_VER_11)) + goto fail; + + /* compress 200,201,202,203 responses only */ + if ((txn->status != 200) && + (txn->status != 201) && + (txn->status != 202) && + (txn->status != 203)) + goto fail; + + + /* Content-Length is null */ + if (!(msg->flags & HTTP_MSGF_TE_CHNK) && msg->body_len == 0) + goto fail; + + /* content is already compressed */ + ctx.idx = 0; + if (http_find_header2("Content-Encoding", 16, res->p, &txn->hdr_idx, &ctx)) + goto fail; + + /* no compression when Cache-Control: no-transform is present in the message */ + ctx.idx = 0; + while (http_find_header2("Cache-Control", 13, res->p, &txn->hdr_idx, &ctx)) { + if (word_match(ctx.line + ctx.val, ctx.vlen, "no-transform", 12)) + goto fail; + } + + comp_type = NULL; + + /* we don't want to compress multipart content-types, nor content-types that are + * not listed in the "compression type" directive if any. If no content-type was + * found but configuration requires one, we don't compress either. Backend has + * the priority. + */ + ctx.idx = 0; + if (http_find_header2("Content-Type", 12, res->p, &txn->hdr_idx, &ctx)) { + if (ctx.vlen >= 9 && strncasecmp("multipart", ctx.line+ctx.val, 9) == 0) + goto fail; + + if ((s->be->comp && (comp_type = s->be->comp->types)) || + (strm_fe(s)->comp && (comp_type = strm_fe(s)->comp->types))) { + for (; comp_type; comp_type = comp_type->next) { + if (ctx.vlen >= comp_type->name_len && + strncasecmp(ctx.line+ctx.val, comp_type->name, comp_type->name_len) == 0) + /* this Content-Type should be compressed */ + break; + } + /* this Content-Type should not be compressed */ + if (comp_type == NULL) + goto fail; + } + } + else { /* no content-type header */ + if ((s->be->comp && s->be->comp->types) || (strm_fe(s)->comp && strm_fe(s)->comp->types)) + goto fail; /* a content-type was required */ + } + + /* limit compression rate */ + if (global.comp_rate_lim > 0) + if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim) + goto fail; + + /* limit cpu usage */ + if (idle_pct < compress_min_idle) + goto fail; + + /* initialize compression */ + if (s->comp_algo->init(&s->comp_ctx, global.tune.comp_maxlevel) < 0) + goto fail; + + s->flags |= SF_COMP_READY; + + /* remove Content-Length header */ + ctx.idx = 0; + if ((msg->flags & HTTP_MSGF_CNT_LEN) && http_find_header2("Content-Length", 14, res->p, &txn->hdr_idx, &ctx)) + http_remove_header2(msg, &txn->hdr_idx, &ctx); + + /* add Transfer-Encoding header */ + if (!(msg->flags & HTTP_MSGF_TE_CHNK)) + http_header_add_tail2(&txn->rsp, &txn->hdr_idx, "Transfer-Encoding: chunked", 26); + + /* + * Add Content-Encoding header when it's not identity encoding. + * RFC 2616 : Identity encoding: This content-coding is used only in the + * Accept-Encoding header, and SHOULD NOT be used in the Content-Encoding + * header. + */ + if (s->comp_algo->cfg_name_len != 8 || memcmp(s->comp_algo->cfg_name, "identity", 8) != 0) { + trash.len = 18; + memcpy(trash.str, "Content-Encoding: ", trash.len); + memcpy(trash.str + trash.len, s->comp_algo->ua_name, s->comp_algo->ua_name_len); + trash.len += s->comp_algo->ua_name_len; + trash.str[trash.len] = '\0'; + http_header_add_tail2(&txn->rsp, &txn->hdr_idx, trash.str, trash.len); + } + return 1; + +fail: + s->comp_algo = NULL; + return 0; +} + +/***********************************************************************/ +/* emit the chunksize followed by a CRLF on the output and return the number of + * bytes written. It goes backwards and starts with the byte before . It + * returns the number of bytes written which will not exceed 10 (8 digits, CR, + * and LF). The caller is responsible for ensuring there is enough room left in + * the output buffer for the string. + */ +static int +http_emit_chunk_size(char *end, unsigned int chksz) +{ + char *beg = end; + + *--beg = '\n'; + *--beg = '\r'; + do { + *--beg = hextab[chksz & 0xF]; + } while (chksz >>= 4); + return end - beg; +} + +/* + * Init HTTP compression + */ +int +http_compression_buffer_init(struct stream *s, struct buffer *in, struct buffer *out) +{ + /* output stream requires at least 10 bytes for the gzip header, plus + * at least 8 bytes for the gzip trailer (crc+len), plus a possible + * plus at most 5 bytes per 32kB block and 2 bytes to close the stream. + */ + if (in->size - buffer_len(in) < 20 + 5 * ((in->i + 32767) >> 15)) + return -1; + + /* prepare an empty output buffer in which we reserve enough room for + * copying the output bytes from , plus 10 extra bytes to write + * the chunk size. We don't copy the bytes yet so that if we have to + * cancel the operation later, it's cheap. + */ + b_reset(out); + out->o = in->o; + out->p += out->o; + out->i = 10; + return 0; +} + +/* + * Add data to compress + */ +int +http_compression_buffer_add_data(struct stream *s, struct buffer *in, struct buffer *out) +{ + struct http_msg *msg = &s->txn->rsp; + int consumed_data = 0; + int data_process_len; + int block1, block2; + + /* + * Temporarily skip already parsed data and chunks to jump to the + * actual data block. It is fixed before leaving. + */ + b_adv(in, msg->next); + + /* + * select the smallest size between the announced chunk size, the input + * data, and the available output buffer size. The compressors are + * assumed to be able to process all the bytes we pass to them at once. + */ + data_process_len = MIN(in->i, msg->chunk_len); + data_process_len = MIN(out->size - buffer_len(out), data_process_len); + + block1 = data_process_len; + if (block1 > bi_contig_data(in)) + block1 = bi_contig_data(in); + block2 = data_process_len - block1; + + /* compressors return < 0 upon error or the amount of bytes read */ + consumed_data = s->comp_algo->add_data(s->comp_ctx, bi_ptr(in), block1, out); + if (consumed_data >= 0 && block2 > 0) { + consumed_data = s->comp_algo->add_data(s->comp_ctx, in->data, block2, out); + if (consumed_data >= 0) + consumed_data += block1; + } + + /* restore original buffer pointer */ + b_rew(in, msg->next); + return consumed_data; +} + +/* + * Flush data in process, and write the header and footer of the chunk. Upon + * success, in and out buffers are swapped to avoid a copy. + */ +int +http_compression_buffer_end(struct stream *s, struct buffer **in, struct buffer **out, int end) +{ + int to_forward; + int left; + struct http_msg *msg = &s->txn->rsp; + struct buffer *ib = *in, *ob = *out; + char *tail; + +#if defined(USE_SLZ) || defined(USE_ZLIB) + int ret; + + /* flush data here */ + + if (end) + ret = s->comp_algo->finish(s->comp_ctx, ob); /* end of data */ + else + ret = s->comp_algo->flush(s->comp_ctx, ob); /* end of buffer */ + + if (ret < 0) + return -1; /* flush failed */ + +#endif /* USE_ZLIB */ + + if (ob->i == 10) { + /* No data were appended, let's drop the output buffer and + * keep the input buffer unchanged. + */ + return 0; + } + + /* OK so at this stage, we have an output buffer looking like this : + * + * <-- o --> <------ i -----> + * +---------+---+------------+-----------+ + * | out | c | comp_in | empty | + * +---------+---+------------+-----------+ + * data p size + * + * is the room reserved to copy ib->o. It starts at ob->data and + * has not yet been filled. is the room reserved to write the chunk + * size (10 bytes). is the compressed equivalent of the data + * part of ib->i. is the amount of empty bytes at the end of + * the buffer, into which we may have to copy the remaining bytes from + * ib->i after the data (chunk size, trailers, ...). + */ + + /* Write real size at the begining of the chunk, no need of wrapping. + * We write the chunk using a dynamic length and adjust ob->p and ob->i + * accordingly afterwards. That will move away from . + */ + left = 10 - http_emit_chunk_size(ob->p + 10, ob->i - 10); + ob->p += left; + ob->i -= left; + + /* Copy previous data from ib->o into ob->o */ + if (ib->o > 0) { + left = bo_contig_data(ib); + memcpy(ob->p - ob->o, bo_ptr(ib), left); + if (ib->o - left) /* second part of the buffer */ + memcpy(ob->p - ob->o + left, ib->data, ib->o - left); + } + + /* chunked encoding requires CRLF after data */ + tail = ob->p + ob->i; + *tail++ = '\r'; + *tail++ = '\n'; + + /* At the end of data, we must write the empty chunk 0, + * and terminate the trailers section with a last . If + * we're forwarding a chunked-encoded response, we'll have a + * trailers section after the empty chunk which needs to be + * forwarded and which will provide the last CRLF. Otherwise + * we write it ourselves. + */ + if (msg->msg_state >= HTTP_MSG_TRAILERS) { + memcpy(tail, "0\r\n", 3); + tail += 3; + if (msg->msg_state >= HTTP_MSG_ENDING) { + memcpy(tail, "\r\n", 2); + tail += 2; + } + } + ob->i = tail - ob->p; + + to_forward = ob->i; + + /* update input rate */ + if (s->comp_ctx && s->comp_ctx->cur_lvl > 0) { + update_freq_ctr(&global.comp_bps_in, msg->next); + strm_fe(s)->fe_counters.comp_in += msg->next; + s->be->be_counters.comp_in += msg->next; + } else { + strm_fe(s)->fe_counters.comp_byp += msg->next; + s->be->be_counters.comp_byp += msg->next; + } + + /* copy the remaining data in the tmp buffer. */ + b_adv(ib, msg->next); + msg->next = 0; + + if (ib->i > 0) { + left = bi_contig_data(ib); + memcpy(ob->p + ob->i, bi_ptr(ib), left); + ob->i += left; + if (ib->i - left) { + memcpy(ob->p + ob->i, ib->data, ib->i - left); + ob->i += ib->i - left; + } + } + + /* swap the buffers */ + *in = ob; + *out = ib; + + if (s->comp_ctx && s->comp_ctx->cur_lvl > 0) { + update_freq_ctr(&global.comp_bps_out, to_forward); + strm_fe(s)->fe_counters.comp_out += to_forward; + s->be->be_counters.comp_out += to_forward; + } + + /* forward the new chunk without remaining data */ + b_adv(ob, to_forward); + + return to_forward; +} + + +/***********************************************************************/ +static int +parse_compression_options(char **args, int section, struct proxy *proxy, + struct proxy *defpx, const char *file, int line, + char **err) +{ + struct comp *comp; + + if (proxy->comp == NULL) { + comp = calloc(1, sizeof(struct comp)); + proxy->comp = comp; + } + else + comp = proxy->comp; + + if (!strcmp(args[1], "algo")) { + struct comp_ctx *ctx; + int cur_arg = 2; + + if (!*args[cur_arg]) { + memprintf(err, "parsing [%s:%d] : '%s' expects \n", + file, line, args[0]); + return -1; + } + while (*(args[cur_arg])) { + if (comp_append_algo(comp, args[cur_arg]) < 0) { + memprintf(err, "'%s' : '%s' is not a supported algorithm.\n", + args[0], args[cur_arg]); + return -1; + } + if (proxy->comp->algos->init(&ctx, 9) == 0) + proxy->comp->algos->end(&ctx); + else { + memprintf(err, "'%s' : Can't init '%s' algorithm.\n", + args[0], args[cur_arg]); + return -1; + } + cur_arg++; + continue; + } + } + else if (!strcmp(args[1], "offload")) + comp->offload = 1; + else if (!strcmp(args[1], "type")) { + int cur_arg = 2; + + if (!*args[cur_arg]) { + memprintf(err, "'%s' expects \n", args[0]); + return -1; + } + while (*(args[cur_arg])) { + comp_append_type(comp, args[cur_arg]); + cur_arg++; + continue; + } + } + else { + memprintf(err, "'%s' expects 'algo', 'type' or 'offload'\n", + args[0]); + return -1; + } + + return 0; +} + +/* boolean, returns true if compression is used (either gzip or deflate) in the response */ +static int +smp_fetch_res_comp(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + smp->data.type = SMP_T_BOOL; + smp->data.u.sint = (smp->strm->comp_algo != NULL); + return 1; +} + +/* string, returns algo */ +static int +smp_fetch_res_comp_algo(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + if (!smp->strm->comp_algo) + return 0; + + smp->data.type = SMP_T_STR; + smp->flags = SMP_F_CONST; + smp->data.u.str.str = smp->strm->comp_algo->cfg_name; + smp->data.u.str.len = smp->strm->comp_algo->cfg_name_len; + return 1; +} + +/* Declare the config parser for "compression" keyword */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_LISTEN, "compression", parse_compression_options }, + { 0, NULL, NULL }, + } +}; + +/* Note: must not be declared as its list will be overwritten */ +static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { + { "res.comp", smp_fetch_res_comp, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP }, + { "res.comp_algo", smp_fetch_res_comp_algo, 0, NULL, SMP_T_STR, SMP_USE_HRSHP }, + { /* END */ }, +}}; + +__attribute__((constructor)) +static void __flt_http_comp_init(void) +{ + cfg_register_keywords(&cfg_kws); + sample_register_fetches(&sample_fetch_keywords); +} diff --git a/src/proto_http.c b/src/proto_http.c index 79b21f902..3cb3b412b 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -69,6 +69,8 @@ #include #include +#include /* NOTE: temporary include, will be removed very soon */ + const char HTTP_100[] = "HTTP/1.1 100 Continue\r\n\r\n"; @@ -2340,234 +2342,6 @@ int parse_qvalue(const char *qvalue, const char **end) return q; } -/* - * Selects a compression algorithm depending on the client request. - */ -int select_compression_request_header(struct stream *s, struct buffer *req) -{ - struct http_txn *txn = s->txn; - struct http_msg *msg = &txn->req; - struct hdr_ctx ctx; - struct comp_algo *comp_algo = NULL; - struct comp_algo *comp_algo_back = NULL; - - /* Disable compression for older user agents announcing themselves as "Mozilla/4" - * unless they are known good (MSIE 6 with XP SP2, or MSIE 7 and later). - * See http://zoompf.com/2012/02/lose-the-wait-http-compression for more details. - */ - ctx.idx = 0; - if (http_find_header2("User-Agent", 10, req->p, &txn->hdr_idx, &ctx) && - ctx.vlen >= 9 && - memcmp(ctx.line + ctx.val, "Mozilla/4", 9) == 0 && - (ctx.vlen < 31 || - memcmp(ctx.line + ctx.val + 25, "MSIE ", 5) != 0 || - ctx.line[ctx.val + 30] < '6' || - (ctx.line[ctx.val + 30] == '6' && - (ctx.vlen < 54 || memcmp(ctx.line + 51, "SV1", 3) != 0)))) { - s->comp_algo = NULL; - return 0; - } - - /* search for the algo in the backend in priority or the frontend */ - if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) || (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) { - int best_q = 0; - - ctx.idx = 0; - while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) { - const char *qval; - int q; - int toklen; - - /* try to isolate the token from the optional q-value */ - toklen = 0; - while (toklen < ctx.vlen && http_is_token[(unsigned char)*(ctx.line + ctx.val + toklen)]) - toklen++; - - qval = ctx.line + ctx.val + toklen; - while (1) { - while (qval < ctx.line + ctx.val + ctx.vlen && http_is_lws[(unsigned char)*qval]) - qval++; - - if (qval >= ctx.line + ctx.val + ctx.vlen || *qval != ';') { - qval = NULL; - break; - } - qval++; - - while (qval < ctx.line + ctx.val + ctx.vlen && http_is_lws[(unsigned char)*qval]) - qval++; - - if (qval >= ctx.line + ctx.val + ctx.vlen) { - qval = NULL; - break; - } - if (strncmp(qval, "q=", MIN(ctx.line + ctx.val + ctx.vlen - qval, 2)) == 0) - break; - - while (qval < ctx.line + ctx.val + ctx.vlen && *qval != ';') - qval++; - } - - /* here we have qval pointing to the first "q=" attribute or NULL if not found */ - q = qval ? parse_qvalue(qval + 2, NULL) : 1000; - - if (q <= best_q) - continue; - - for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) { - if (*(ctx.line + ctx.val) == '*' || - word_match(ctx.line + ctx.val, toklen, comp_algo->ua_name, comp_algo->ua_name_len)) { - s->comp_algo = comp_algo; - best_q = q; - break; - } - } - } - } - - /* remove all occurrences of the header when "compression offload" is set */ - if (s->comp_algo) { - if ((s->be->comp && s->be->comp->offload) || (strm_fe(s)->comp && strm_fe(s)->comp->offload)) { - http_remove_header2(msg, &txn->hdr_idx, &ctx); - ctx.idx = 0; - while (http_find_header2("Accept-Encoding", 15, req->p, &txn->hdr_idx, &ctx)) { - http_remove_header2(msg, &txn->hdr_idx, &ctx); - } - } - return 1; - } - - /* identity is implicit does not require headers */ - if ((s->be->comp && (comp_algo_back = s->be->comp->algos)) || (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos))) { - for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) { - if (comp_algo->cfg_name_len == 8 && memcmp(comp_algo->cfg_name, "identity", 8) == 0) { - s->comp_algo = comp_algo; - return 1; - } - } - } - - s->comp_algo = NULL; - return 0; -} - -/* - * Selects a comression algorithm depending of the server response. - */ -int select_compression_response_header(struct stream *s, struct buffer *res) -{ - struct http_txn *txn = s->txn; - struct http_msg *msg = &txn->rsp; - struct hdr_ctx ctx; - struct comp_type *comp_type; - - /* no common compression algorithm was found in request header */ - if (s->comp_algo == NULL) - goto fail; - - /* HTTP < 1.1 should not be compressed */ - if (!(msg->flags & HTTP_MSGF_VER_11) || !(txn->req.flags & HTTP_MSGF_VER_11)) - goto fail; - - /* compress 200,201,202,203 responses only */ - if ((txn->status != 200) && - (txn->status != 201) && - (txn->status != 202) && - (txn->status != 203)) - goto fail; - - - /* Content-Length is null */ - if (!(msg->flags & HTTP_MSGF_TE_CHNK) && msg->body_len == 0) - goto fail; - - /* content is already compressed */ - ctx.idx = 0; - if (http_find_header2("Content-Encoding", 16, res->p, &txn->hdr_idx, &ctx)) - goto fail; - - /* no compression when Cache-Control: no-transform is present in the message */ - ctx.idx = 0; - while (http_find_header2("Cache-Control", 13, res->p, &txn->hdr_idx, &ctx)) { - if (word_match(ctx.line + ctx.val, ctx.vlen, "no-transform", 12)) - goto fail; - } - - comp_type = NULL; - - /* we don't want to compress multipart content-types, nor content-types that are - * not listed in the "compression type" directive if any. If no content-type was - * found but configuration requires one, we don't compress either. Backend has - * the priority. - */ - ctx.idx = 0; - if (http_find_header2("Content-Type", 12, res->p, &txn->hdr_idx, &ctx)) { - if (ctx.vlen >= 9 && strncasecmp("multipart", ctx.line+ctx.val, 9) == 0) - goto fail; - - if ((s->be->comp && (comp_type = s->be->comp->types)) || - (strm_fe(s)->comp && (comp_type = strm_fe(s)->comp->types))) { - for (; comp_type; comp_type = comp_type->next) { - if (ctx.vlen >= comp_type->name_len && - strncasecmp(ctx.line+ctx.val, comp_type->name, comp_type->name_len) == 0) - /* this Content-Type should be compressed */ - break; - } - /* this Content-Type should not be compressed */ - if (comp_type == NULL) - goto fail; - } - } - else { /* no content-type header */ - if ((s->be->comp && s->be->comp->types) || (strm_fe(s)->comp && strm_fe(s)->comp->types)) - goto fail; /* a content-type was required */ - } - - /* limit compression rate */ - if (global.comp_rate_lim > 0) - if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim) - goto fail; - - /* limit cpu usage */ - if (idle_pct < compress_min_idle) - goto fail; - - /* initialize compression */ - if (s->comp_algo->init(&s->comp_ctx, global.tune.comp_maxlevel) < 0) - goto fail; - - s->flags |= SF_COMP_READY; - - /* remove Content-Length header */ - ctx.idx = 0; - if ((msg->flags & HTTP_MSGF_CNT_LEN) && http_find_header2("Content-Length", 14, res->p, &txn->hdr_idx, &ctx)) - http_remove_header2(msg, &txn->hdr_idx, &ctx); - - /* add Transfer-Encoding header */ - if (!(msg->flags & HTTP_MSGF_TE_CHNK)) - http_header_add_tail2(&txn->rsp, &txn->hdr_idx, "Transfer-Encoding: chunked", 26); - - /* - * Add Content-Encoding header when it's not identity encoding. - * RFC 2616 : Identity encoding: This content-coding is used only in the - * Accept-Encoding header, and SHOULD NOT be used in the Content-Encoding - * header. - */ - if (s->comp_algo->cfg_name_len != 8 || memcmp(s->comp_algo->cfg_name, "identity", 8) != 0) { - trash.len = 18; - memcpy(trash.str, "Content-Encoding: ", trash.len); - memcpy(trash.str + trash.len, s->comp_algo->ua_name, s->comp_algo->ua_name_len); - trash.len += s->comp_algo->ua_name_len; - trash.str[trash.len] = '\0'; - http_header_add_tail2(&txn->rsp, &txn->hdr_idx, trash.str, trash.len); - } - return 1; - -fail: - s->comp_algo = NULL; - return 0; -} - void http_adjust_conn_mode(struct stream *s, struct http_txn *txn, struct http_msg *msg) { struct proxy *fe = strm_fe(s);