From e3eb6a90352d2cfcbd03fb7b54b944c922f12421 Mon Sep 17 00:00:00 2001
From: Aurelien DARRAGON <adarragon@haproxy.com>
Date: Wed, 4 Sep 2024 15:03:46 +0200
Subject: [PATCH] MEDIUM: log: consider log-steps proxy setting for existing
 log origins

During tcp/http transaction processing, haproxy may produce logs at
different steps during the processing (accept, connect, request,
response, close). But the behavior is hardly configurable because
haproxy will only emit a single log per transaction, and by default
it will try to produce the log once all log aliases or fetches used
in the logformat could be satisfied, which means the log is often
emitted during connection teardown, unless "option logasap" is used.

We were often asked to have a way to emit multiple logs for a single
transaction, like for instance emit log during accept, then request,
response and close for instance, see GH #401 for more context.

Thanks to "log-steps" keyword introduced by commit "MINOR: log:
introduce "log-steps" proxy keyword", it is now possible to explictly
configure when logs should be generated by haproxy when processing a
transaction. This commit adds the required checks so that log-steps
proxy option is properly considered for existing logs generated by
haproxy. If "log-steps" is not specified on the proxy, the old behavior
is preserved.

Note: a slight cpu overhead should only be visible when "log-steps"
keyword will be used due to the implementation relying on eb32 lookup
instead of basic bitfield check as described in "MINOR: proxy: add
log_steps struct member". However, the default behavior shouldn't be
affected.

When combining log-steps with log-profiles, user has the ability to
explicitly control how and when haproxy should generate logs during
requests handling.
---
 src/cli.c      | 11 ++++++++++-
 src/frontend.c |  6 +++++-
 src/http_ana.c | 28 +++++++++++++++++++++++++---
 src/stream.c   | 31 +++++++++++++++++++++++++------
 4 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/src/cli.c b/src/cli.c
index e2ae2eb70..1d80b7602 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -3112,6 +3112,8 @@ int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit)
 	}
 
 	if (s->scb->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) {
+		uint8_t do_log = 0;
+
 		/* stream cleanup */
 
 		pcli_write_prompt(s);
@@ -3145,7 +3147,14 @@ int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit)
 		pendconn_free(s);
 
 		/* let's do a final log if we need it */
-		if (!lf_expr_isempty(&fe->logformat) && s->logs.logwait &&
+		if (fe->to_log == LW_LOGSTEPS) {
+			if (log_orig_proxy(LOG_ORIG_TXN_CLOSE, fe))
+				do_log = 1;
+		}
+		else if (!lf_expr_isempty(&fe->logformat) && s->logs.logwait)
+			do_log = 1;
+
+		if (do_log &&
 		    !(s->flags & SF_MONITOR) &&
 		    (!(fe->options & PR_O_NULLNOLOG) || s->req.total)) {
 			s->do_log(s, log_orig(LOG_ORIG_TXN_CLOSE, LOG_ORIG_FL_NONE));
diff --git a/src/frontend.c b/src/frontend.c
index 3d18db94a..22270f287 100644
--- a/src/frontend.c
+++ b/src/frontend.c
@@ -55,7 +55,11 @@ int frontend_accept(struct stream *s)
 
 	if ((fe->mode == PR_MODE_TCP || fe->mode == PR_MODE_HTTP)
 	    && (!LIST_ISEMPTY(&fe->loggers))) {
-		if (likely(!lf_expr_isempty(&fe->logformat))) {
+		if (fe->to_log == LW_LOGSTEPS) {
+			if (log_orig_proxy(LOG_ORIG_TXN_ACCEPT, fe))
+				s->do_log(s, log_orig(LOG_ORIG_TXN_ACCEPT, LOG_ORIG_FL_NONE));
+		}
+		else if (likely(!lf_expr_isempty(&fe->logformat))) {
 			/* we have the client ip */
 			if (s->logs.logwait & LW_CLIP)
 				if (!(s->logs.logwait &= ~(LW_CLIP|LW_INIT)))
diff --git a/src/http_ana.c b/src/http_ana.c
index 175c01cba..a8d1b18fc 100644
--- a/src/http_ana.c
+++ b/src/http_ana.c
@@ -82,6 +82,7 @@ static int http_reply_100_continue(struct stream *s);
  */
 int http_wait_for_request(struct stream *s, struct channel *req, int an_bit)
 {
+	uint8_t do_log = 0;
 
 	/*
 	 * We will analyze a complete HTTP request to check the its syntax.
@@ -264,7 +265,20 @@ int http_wait_for_request(struct stream *s, struct channel *req, int an_bit)
 	 * Note: we cannot log anymore if the request has been
 	 * classified as invalid.
 	 */
-	if (unlikely(s->logs.logwait & LW_REQ)) {
+	if (unlikely(sess->fe->to_log == LW_LOGSTEPS)) {
+		if (log_orig_proxy(LOG_ORIG_TXN_REQUEST, sess->fe))
+			do_log = 1; /* forced log (through "log-steps") */
+	}
+	else if (unlikely(s->logs.logwait & LW_REQ)) {
+		/* default behavior when log-steps isn't specified:
+		 * don't log systematically, but only if conditions are
+		 * met, ie: not log emitted yet for this txn, all data available
+		 * (unless "option logasap" is used)
+		 */
+		do_log = 2;
+	}
+
+	if (do_log) {
 		/* we have a complete HTTP request that we must log */
 		if ((txn->uri = pool_alloc(pool_head_requri)) != NULL) {
 			size_t len;
@@ -272,7 +286,7 @@ int http_wait_for_request(struct stream *s, struct channel *req, int an_bit)
 			len = http_fmt_req_line(sl, txn->uri, global.tune.requri_len - 1);
 			txn->uri[len] = 0;
 
-			if (!(s->logs.logwait &= ~(LW_REQ|LW_INIT)))
+			if (do_log == 1 || !(s->logs.logwait &= ~(LW_REQ|LW_INIT)))
 				s->do_log(s, log_orig(LOG_ORIG_TXN_REQUEST, LOG_ORIG_FL_NONE));
 		} else {
 			ha_alert("HTTP logging : out of memory.\n");
@@ -1682,6 +1696,7 @@ int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, s
 	struct htx *htx;
 	struct proxy *cur_proxy;
 	enum rule_result ret = HTTP_RULE_RES_CONT;
+	uint8_t do_log = 0;
 
 	if (unlikely(msg->msg_state < HTTP_MSG_BODY))	/* we need more data */
 		return 0;
@@ -1904,11 +1919,18 @@ int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, s
 	rep->analysers &= ~AN_RES_FLT_XFER_DATA;
 	rep->analysers |= AN_RES_HTTP_XFER_BODY;
 
+	if (sess->fe->to_log == LW_LOGSTEPS) {
+		if (log_orig_proxy(LOG_ORIG_TXN_RESPONSE, sess->fe))
+			do_log = 1;
+	}
+	else if ((!lf_expr_isempty(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)))
+		do_log = 1;
+
 	/* if the user wants to log as soon as possible, without counting
 	 * bytes from the server, then this is the right moment. We have
 	 * to temporarily assign bytes_out to log what we currently have.
 	 */
-	if (!lf_expr_isempty(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) {
+	if (do_log) {
 		s->logs.t_close = s->logs.t_data; /* to get a valid end date */
 		s->logs.bytes_out = htx->data;
 		s->do_log(s, log_orig(LOG_ORIG_TXN_RESPONSE, LOG_ORIG_FL_NONE));
diff --git a/src/stream.c b/src/stream.c
index f955b8129..9b48e7d78 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -896,6 +896,7 @@ void back_establish(struct stream *s)
 	struct connection *conn = sc_conn(s->scb);
 	struct channel *req = &s->req;
 	struct channel *rep = &s->res;
+	uint8_t do_log = 0;
 
 	DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
 	/* First, centralize the timers information, and clear any irrelevant
@@ -916,19 +917,28 @@ void back_establish(struct stream *s)
 	if (objt_server(s->target))
 		health_adjust(__objt_server(s->target), HANA_STATUS_L4_OK);
 
+	if (strm_fe(s)->to_log == LW_LOGSTEPS) {
+		if (log_orig_proxy(LOG_ORIG_TXN_CONNECT, strm_fe(s)))
+			do_log = 1;
+	}
+
 	if (!IS_HTX_STRM(s)) { /* let's allow immediate data connection in this case */
 		/* if the user wants to log as soon as possible, without counting
 		 * bytes from the server, then this is the right moment. */
-		if (!lf_expr_isempty(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) {
-			/* note: no pend_pos here, session is established */
-			s->logs.t_close = s->logs.t_connect; /* to get a valid end date */
-			s->do_log(s, log_orig(LOG_ORIG_TXN_CONNECT, LOG_ORIG_FL_NONE));
-		}
+		if (strm_fe(s)->to_log != LW_LOGSTEPS &&
+		    !lf_expr_isempty(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES))
+			do_log = 1;
 	}
 	else {
 		s->scb->flags |= SC_FL_RCV_ONCE; /* a single read is enough to get response headers */
 	}
 
+	if (do_log) {
+		/* note: no pend_pos here, session is established */
+		s->logs.t_close = s->logs.t_connect; /* to get a valid end date */
+		s->do_log(s, log_orig(LOG_ORIG_TXN_CONNECT, LOG_ORIG_FL_NONE));
+	}
+
 	rep->analysers |= strm_fe(s)->fe_rsp_ana | s->be->be_rsp_ana;
 
 	se_have_more_data(s->scb->sedesc);
@@ -2564,6 +2574,8 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
 	}
 
 	if (!(s->flags & SF_IGNORE)) {
+		uint8_t do_log = 0;
+
 		s->logs.t_close = ns_to_ms(now_ns - s->logs.accept_ts);
 
 		stream_process_counters(s);
@@ -2586,7 +2598,14 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
 		}
 
 		/* let's do a final log if we need it */
-		if (!lf_expr_isempty(&sess->fe->logformat) && s->logs.logwait &&
+		if (sess->fe->to_log == LW_LOGSTEPS) {
+			if (log_orig_proxy(LOG_ORIG_TXN_CLOSE, sess->fe))
+				do_log = 1;
+		}
+		else if (!lf_expr_isempty(&sess->fe->logformat) && s->logs.logwait)
+			do_log = 1;
+
+		if (do_log &&
 		    !(s->flags & SF_MONITOR) &&
 		    (!(sess->fe->options & PR_O_NULLNOLOG) || req->total)) {
 			/* we may need to know the position in the queue */