From e138dda1e0de087fa8ad3c2ce548b8cea3e4d02f Mon Sep 17 00:00:00 2001 From: Baptiste Assmann Date: Thu, 22 Oct 2020 15:39:03 +0200 Subject: [PATCH] MINOR: sample: Add converters to parse FIX messages This patch implements a couple of converters to validate and extract tag value from a FIX (Financial Information eXchange) message. The validation consists in a few checks such as mandatory fields and checksum computation. The extraction can get any tag value based on a tag string or tag id. This patch requires the istend() function. Thus it depends on "MINOR: ist: Add istend() function to return a pointer to the end of the string". Reviewed and Fixed by Christopher Faulet --- Makefile | 2 +- doc/configuration.txt | 46 +++++++ include/haproxy/fix-t.h | 70 +++++++++++ include/haproxy/fix.h | 97 +++++++++++++++ src/fix.c | 264 ++++++++++++++++++++++++++++++++++++++++ src/sample.c | 86 +++++++++++++ 6 files changed, 564 insertions(+), 1 deletion(-) create mode 100644 include/haproxy/fix-t.h create mode 100644 include/haproxy/fix.h create mode 100644 src/fix.c diff --git a/Makefile b/Makefile index 27aac3389..11c3ebfb9 100644 --- a/Makefile +++ b/Makefile @@ -843,7 +843,7 @@ OBJS += src/mux_h2.o src/mux_fcgi.o src/http_ana.o src/stream.o \ src/ebimtree.o src/uri_auth.o src/freq_ctr.o src/ebsttree.o \ src/ebistree.o src/auth.o src/wdt.o src/http_acl.o \ src/hpack-enc.o src/hpack-huff.o src/ebtree.o src/base64.o \ - src/hash.o src/dgram.o src/version.o + src/hash.o src/dgram.o src/version.o src/fix.o ifneq ($(TRACE),) OBJS += src/calltrace.o diff --git a/doc/configuration.txt b/doc/configuration.txt index e85c525ce..a7dc4b69a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -15133,6 +15133,52 @@ field(,[,]) str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_ str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3 +fix_is_valid + Parses a binary payload and performs sanity checks regarding FIX (Financial + Information eXchange): + + - checks that all tag IDs and values are not empty and the tags IDs are well + numeric + - checks the BeginString tag is the first tag with a valide FIX version + - checks the BodyLength tag is the second one with the right body length + - checks the MstType tag is the third tag. + - checks that last tag in the message is the CheckSum tag with a valid + checksum + + Due to current HAProxy design, only the first message sent by the client and + the server can be parsed. + + This converter returns a boolean, true if the payload contains a valid FIX + message, false if not. + + See also the fix_tag_value converter. + + Example: + tcp-request inspect-delay 10s + tcp-request content reject unless { req.payload(0,0),fix_is_valid } + +fix_tag_value() + Parses a FIX (Financial Information eXchange) message and extracts the value + from the tag . can be a string or an integer pointing to the + desired tag. Any integer value is accepted, but only the following strings + are translated into their integer equivalent: BeginString, BodyLength, + MsgType, SenderComID, TargetComID, CheckSum. More tag names can be easily + added. + + Due to current HAProxy design, only the first message sent by the client and + the server can be parsed. No message validation is performed by this + converter. It is highly recommended to validate the message first using + fix_is_valid converter. + + See also the fix_is_valid converter. + + Example: + tcp-request inspect-delay 10s + tcp-request content reject unless { req.payload(0,0),fix_is_valid } + # MsgType tag ID is 35, so both lines below will return the same content + tcp-request content set-var(txn.foo) req.payload(0,0),fix_tag_value(35) + tcp-request content set-var(txn.bar) req.payload(0,0),fix_tag_value(MsgType) + hex Converts a binary input sample to a hex string containing two hex digits per input byte. It is used to log or transfer hex dumps of some binary input data diff --git a/include/haproxy/fix-t.h b/include/haproxy/fix-t.h new file mode 100644 index 000000000..bba9bd61a --- /dev/null +++ b/include/haproxy/fix-t.h @@ -0,0 +1,70 @@ +/* + * include/haproxy/fix-t.h + * This file contains structure declarations for FIX protocol. + * + * Copyright 2020 Baptiste Assmann + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FIX_T_H +#define _HAPROXY_FIX_T_H + +#include + +/* + * FIX messages are composed by a list of Tag=Value separated by a 'delimiter' + */ +#define FIX_DELIMITER 0x01 + +/* + * know FIX version strings + */ +#define FIX_4_0 (ist("FIX.4.0")) +#define FIX_4_1 (ist("FIX.4.1")) +#define FIX_4_2 (ist("FIX.4.2")) +#define FIX_4_3 (ist("FIX.4.3")) +#define FIX_4_4 (ist("FIX.4.4")) +#define FIX_5_0 (ist("FIXT.1.1")) +/* FIX_5_0SP1 and FIX_5_0SP2 have the same version string than FIX5_0 */ + +/* + * Supported FIX tag ID + */ +#define FIX_TAG_BeginString 8 +#define FIX_TAG_BodyLength 9 +#define FIX_TAG_CheckSum 10 +#define FIX_TAG_MsgType 35 +#define FIX_TAG_SenderComID 49 +#define FIX_TAG_TargetComID 56 + + +#define FIX_MSG_MINSIZE 26 /* Minimal length for a FIX Message */ +#define FIX_CHKSUM_SIZE 7 /* Length of the CheckSum tag (10=NNN) */ +/* + * return code when parsing / validating FIX messages + */ +#define FIX_INVALID_MESSAGE -1 +#define FIX_NEED_MORE_DATA 0 +#define FIX_VALID_MESSAGE 1 + +#endif /* _HAPROXY_FIX_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/fix.h b/include/haproxy/fix.h new file mode 100644 index 000000000..1d242e7ff --- /dev/null +++ b/include/haproxy/fix.h @@ -0,0 +1,97 @@ +/* + * include/haproxy/fix.h + * This file contains functions and macros declarations for FIX protocol decoding. + * + * Copyright 2020 Baptiste Assmann + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_FIX_H +#define _HAPROXY_FIX_H + +#include + +#include +#include + +unsigned int fix_check_id(const struct ist str, const struct ist version); +int fix_validate_message(const struct ist msg); +struct ist fix_tag_value(const struct ist msg, unsigned int tagid); + +/* + * Return the FIX version string (one of FIX_X_Y macros) correspoding to + * or IST_NULL if not found. + */ +static inline struct ist fix_version(const struct ist str) +{ + /* 7 is the minimal size for the FIX version string */ + if (istlen(str) < 7) + return IST_NULL; + + if (isteq(FIX_4_0, str)) + return FIX_4_0; + else if (isteq(FIX_4_1, str)) + return FIX_4_1; + else if (isteq(FIX_4_2, str)) + return FIX_4_2; + else if (isteq(FIX_4_3, str)) + return FIX_4_3; + else if (isteq(FIX_4_4, str)) + return FIX_4_4; + else if (isteq(FIX_5_0, str)) + return FIX_5_0; + + return IST_NULL; +} + +/* + * Return the FIX tag ID corresponding to if one found or 0 if not. + * + * full list of tag ID available here, just in case we need to support + * more "string" equivalent in the future: + * https://www.onixs.biz/fix-dictionary/4.2/fields_by_tag.html + */ +static inline unsigned int fix_tagid(const struct ist tag) +{ + unsigned id = fix_check_id(tag, IST_NULL); + + if (id) + return id; + + else if (isteqi(tag, ist("MsgType"))) + return FIX_TAG_MsgType; + else if (isteqi(tag, ist("CheckSum"))) + return FIX_TAG_CheckSum; + else if (isteqi(tag, ist("BodyLength"))) + return FIX_TAG_BodyLength; + else if (isteqi(tag, ist("TargetComID"))) + return FIX_TAG_TargetComID; + else if (isteqi(tag, ist("BeginString"))) + return FIX_TAG_BeginString; + else if (isteqi(tag, ist("SenderComID"))) + return FIX_TAG_SenderComID; + + return 0; +} + +#endif /* _HAPROXY_FIX_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/fix.c b/src/fix.c new file mode 100644 index 000000000..82af1c0b9 --- /dev/null +++ b/src/fix.c @@ -0,0 +1,264 @@ +/* + * Financial Information eXchange Protocol + * + * Copyright 2020 Baptiste Assmann + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +/* + * Return the corresponding numerical tag id if looks like a valid FIX + * protocol tag ID. Otherwise, 0 is returned (0 is an invalid id). + * + * If is given, it must be one of a defined FIX version string (see + * FIX_X_Y macros). In this case, the function will also check tag ID ranges. If + * no is provided, any strictly positive integer is valid. + * + * tag ID range depends on FIX protocol version: + * - FIX.4.0: 1-140 + * - FIX.4.1: 1-211 + * - FIX.4.2: 1-446 + * - FIX.4.3: 1-659 + * - FIX.4.4: 1-956 + * - FIX.5.0: 1-1139 + * - FIX.5.0SP1: 1-1426 + * - FIX.5.0SP2: 1-1621 + * range 10000 to 19999 is for "user defined tags" + */ +unsigned int fix_check_id(const struct ist str, const struct ist version) { + const char *s, *end; + unsigned int ret; + + s = istptr(str); + end = istend(str); + ret = read_uint(&s, end); + + /* we did not consume all characters from , this is an error */ + if (s != end) + return 0; + + /* field ID can't be 0 */ + if (ret == 0) + return 0; + + /* we can leave now if version was not provided */ + if (!isttest(version)) + return ret; + + /* we can leave now if this is a "user defined tag id" */ + if (ret >= 10000 && ret <= 19999) + return ret; + + /* now perform checking per FIX version */ + if (istissame(FIX_4_0, version) && (ret <= 140)) + return ret; + else if (istissame(FIX_4_1, version) && (ret <= 211)) + return ret; + else if (istissame(FIX_4_2, version) && (ret <= 446)) + return ret; + else if (istissame(FIX_4_3, version) && (ret <= 659)) + return ret; + else if (istissame(FIX_4_4, version) && (ret <= 956)) + return ret; + /* version string is the same for all 5.0 versions, so we can only take + * into consideration the biggest range + */ + else if (istissame(FIX_5_0, version) && (ret <= 1621)) + return ret; + + return 0; +} + +/* + * Parse a FIX message and performs following sanity checks: + * + * - checks tag ids and values are not empty + * - checks tag ids are numerical value + * - checks the first tag is BeginString with a valid version + * - checks the second tag is BodyLength with the right body length + * - checks the third tag is MsgType + * - checks the last tag is CheckSum with a valid checksum + * + * Returns: + * FIX_INVALID_MESSAGE if the message is invalid + * FIX_NEED_MORE_DATA if we need more data to fully validate the message + * FIX_VALID_MESSAGE if the message looks valid + */ +int fix_validate_message(const struct ist msg) +{ + struct ist parser, version; + unsigned int tagnum, bodylen; + unsigned char checksum; + char *body; + int ret = FIX_INVALID_MESSAGE; + + if (istlen(msg) < FIX_MSG_MINSIZE) { + ret = FIX_NEED_MORE_DATA; + goto end; + } + + /* parsing the whole message to compute the checksum and check all tag + * ids are properly set. Here we are sure to have the 2 first tags. Thus + * the version and the body length can be checked. + */ + parser = msg; + version = IST_NULL; + checksum = tagnum = bodylen = 0; + body = NULL; + while (istlen(parser) > 0) { + struct ist tag, value; + unsigned int tagid; + const char *p, *end; + + /* parse the tag ID and its value and perform first sanity checks */ + value = iststop(istfind(parser, '='), FIX_DELIMITER); + + /* end of value not found */ + if (istend(value) == istend(parser)) { + ret = FIX_NEED_MORE_DATA; + goto end; + } + /* empty tag or empty value are forbbiden */ + if (istptr(parser) == istptr(value) ||!istlen(value)) + goto end; + + /* value points on '='. get the tag and skip '=' */ + tag = ist2(istptr(parser), istptr(value) - istptr(parser)); + value = istnext(value); + + /* Check the tag id */ + tagid = fix_check_id(tag, version); + if (!tagid) + goto end; + tagnum++; + + if (tagnum == 1) { + /* the first tag must be BeginString */ + if (tagid != FIX_TAG_BeginString) + goto end; + + version = fix_version(value); + if (!isttest(version)) + goto end; + } + else if (tagnum == 2) { + /* the second tag must be bodyLength */ + if (tagid != FIX_TAG_BodyLength) + goto end; + + p = istptr(value); + end = istend(value); + bodylen = read_uint(&p, end); + + /* we did not consume all characters from or no body, this is an error. + * There is at least the message type in the body. + */ + if (p != end || !bodylen) + goto end; + + body = istend(value) + 1; + } + else if (tagnum == 3) { + /* the third tag must be MsgType */ + if (tagid != FIX_TAG_MsgType) + goto end; + } + else if (tagnum > 3 && tagid == FIX_TAG_CheckSum) { + /* CheckSum tag should be the last one and is not taken into account + * to compute the checksum itself and the body length. The value is + * a three-octet representation of the checksum decimal value. + */ + if (bodylen != istptr(parser) - body) + goto end; + + if (istlen(value) != 3) + goto end; + if (checksum != strl2ui(istptr(value), istlen(value))) + goto end; + + /* End of the message, exit from the loop */ + ret = FIX_VALID_MESSAGE; + goto end; + } + + /* compute checksum of tag=value */ + for (p = istptr(tag) ; p < istend(tag) ; ++p) + checksum += *p; + checksum += '='; + for (p = istptr(value) ; p < istend(value) ; ++p) + checksum += *p; + checksum += FIX_DELIMITER; + + /* move the parser after the value and its delimiter */ + parser = istadv(parser, istlen(tag) + istlen(value) + 2); + } + + if (body) { + /* We start to read the body but we don't reached the checksum tag */ + ret = FIX_NEED_MORE_DATA; + } + + end: + return ret; +} + + +/* + * Iter on a FIX message and return the value of . + * + * Returns the corresponding value if is found. If is not found + * because more data are required, the message with a length set to 0 is + * returned. If is not found in the message or if the message is + * invalid, IST_NULL is returned. + * + * Note: Only simple sanity checks are performed on tags and values (not empty). + * + * the tag looks like + * =FIX_DELIMITER with and not empty + */ +struct ist fix_tag_value(const struct ist msg, unsigned int tagid) +{ + struct ist parser, t, v; + unsigned int id; + + parser = msg; + while (istlen(parser) > 0) { + v = iststop(istfind(parser, '='), FIX_DELIMITER); + + /* delimiter not found, need more data */ + if (istend(v) == istend(parser)) + break; + + /* empty tag or empty value, invalid */ + if (istptr(parser) == istptr(v) || !istlen(v)) + goto not_found_or_invalid; + + t = ist2(istptr(parser), istptr(v) - istptr(parser)); + v = istnext(v); + + id = fix_check_id(t, IST_NULL); + if (!id) + goto not_found_or_invalid; + if (id == tagid) { + /* found, return the corrsponding value */ + return v; + } + + /* CheckSum tag is the last one, no found */ + if (id == FIX_TAG_CheckSum) + goto not_found_or_invalid; + + parser = istadv(parser, istlen(t) + istlen(v) + 2); + } + /* not enough data to find */ + return ist2(istptr(msg), 0); + + not_found_or_invalid: + return IST_NULL; +} diff --git a/src/sample.c b/src/sample.c index a9c08ef54..7c5951791 100644 --- a/src/sample.c +++ b/src/sample.c @@ -26,9 +26,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -3214,6 +3216,86 @@ static int sample_conv_protobuf_check(struct arg *args, struct sample_conv *conv return 1; } +/* + * Extract the tag value of an input binary sample. Takes a mandatory argument: + * the FIX protocol tag identifier. + * Return 1 if the tag was found, 0 if not. + */ +static int sample_conv_fix_tag_value(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct ist value; + + smp->flags &= ~SMP_F_MAY_CHANGE; + value = fix_tag_value(ist2(smp->data.u.str.area, smp->data.u.str.data), + arg_p[0].data.sint); + if (!istlen(value)) { + if (!isttest(value)) { + /* value != IST_NULL, need more data */ + smp->flags |= SMP_F_MAY_CHANGE; + } + return 0; + } + + smp->data.u.str = ist2buf(value); + smp->flags |= SMP_F_CONST; + + return 1; +} + +/* This function checks the "fix_tag_value" converter configuration. + * It expects a "known" (by HAProxy) tag name or ID. + * Tag string names are converted to their ID counterpart because this is the + * format they are sent over the wire. + */ +static int sample_conv_fix_value_check(struct arg *args, struct sample_conv *conv, + const char *file, int line, char **err) +{ + struct ist str; + unsigned int tag; + + str = ist2(args[0].data.str.area, args[0].data.str.data); + tag = fix_tagid(str); + if (!tag) { + memprintf(err, "Unknown FIX tag name '%s'", args[0].data.str.area); + return 0; + } + + chunk_destroy(&args[0].data.str); + args[0].type = ARGT_SINT; + args[0].data.sint = tag; + + return 1; +} + +/* + * Checks that a buffer contains a valid FIX message + * + * Return 1 if the check could be run, 0 if not. + * The result of the analyse itsef is stored in as a boolean + */ +static int sample_conv_fix_is_valid(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct ist msg; + + msg = ist2(smp->data.u.str.area, smp->data.u.str.data); + + smp->flags &= ~SMP_F_MAY_CHANGE; + switch (fix_validate_message(msg)) { + case FIX_VALID_MESSAGE: + smp->data.type = SMP_T_BOOL; + smp->data.u.sint = 1; + return 1; + case FIX_NEED_MORE_DATA: + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + case FIX_INVALID_MESSAGE: + smp->data.type = SMP_T_BOOL; + smp->data.u.sint = 0; + return 1; + } + return 0; +} + /* This function checks the "strcmp" converter's arguments and extracts the * variable name and its scope. */ @@ -3802,6 +3884,10 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "ungrpc", sample_conv_ungrpc, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN }, { "protobuf", sample_conv_protobuf, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN }, + /* FIX converters */ + { "fix_is_valid", sample_conv_fix_is_valid, 0, NULL, SMP_T_BIN, SMP_T_BOOL }, + { "fix_tag_value", sample_conv_fix_tag_value, ARG1(1,STR), sample_conv_fix_value_check, SMP_T_BIN, SMP_T_BIN }, + { "iif", sample_conv_iif, ARG2(2, STR, STR), NULL, SMP_T_BOOL, SMP_T_STR }, { "and", sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },