diff --git a/include/proto/backend.h b/include/proto/backend.h index 1720105ca..03fa518ca 100644 --- a/include/proto/backend.h +++ b/include/proto/backend.h @@ -43,6 +43,7 @@ void recalc_server_map(struct proxy *px); int be_downtime(struct proxy *px); void init_server_map(struct proxy *p); void fwrr_init_server_groups(struct proxy *p); +void fwlc_init_server_tree(struct proxy *p); /* * This function tries to find a running server with free connection slots for diff --git a/include/types/backend.h b/include/types/backend.h index 2b35860f1..2d627229b 100644 --- a/include/types/backend.h +++ b/include/types/backend.h @@ -2,7 +2,7 @@ include/types/backend.h This file rassembles definitions for backends - Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2008 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -42,6 +42,7 @@ #define BE_LB_ALGO_SH (BE_LB_PROP_L4 | 0x02) /* balance on source IP hash */ #define BE_LB_ALGO_UH (BE_LB_PROP_L7 | 0x03) /* balance on URI hash */ #define BE_LB_ALGO_PH (BE_LB_PROP_L7 | 0x04) /* balance on URL parameter hash */ +#define BE_LB_ALGO_LC (BE_LB_PROP_DYN | 0x05) /* fast weighted round-robin mode (dynamic) */ /* various constants */ diff --git a/include/types/proxy.h b/include/types/proxy.h index a2238cb31..98baf539d 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h @@ -150,9 +150,15 @@ struct proxy { struct fwrr_group act; /* weighted round robin on the active servers */ struct fwrr_group bck; /* weighted round robin on the backup servers */ } fwrr; + struct { + struct eb_root act; /* weighted least conns on the active servers */ + struct eb_root bck; /* weighted least conns on the backup servers */ + } fwlc; void (*update_server_eweight)(struct server *);/* if non-NULL, to be called after eweight change */ void (*set_server_status_up)(struct server *);/* to be called after status changes to UP */ void (*set_server_status_down)(struct server *);/* to be called after status changes to DOWN */ + void (*server_take_conn)(struct server *);/* to be called when connection is assigned */ + void (*server_drop_conn)(struct server *);/* to be called when connection is dropped */ } lbprm; /* LB parameters for all algorithms */ char *cookie_name; /* name of the cookie to look for */ diff --git a/src/backend.c b/src/backend.c index 666c218bf..436a4d9b3 100644 --- a/src/backend.c +++ b/src/backend.c @@ -829,6 +829,291 @@ static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvto return srv; } +/* Remove a server from a tree. It must have previously been dequeued. This + * function is meant to be called when a server is going down or has its + * weight disabled. + */ +static inline void fwlc_remove_from_tree(struct server *s) +{ + s->lb_tree = NULL; +} + +/* simply removes a server from a tree */ +static inline void fwlc_dequeue_srv(struct server *s) +{ + eb32_delete(&s->lb_node); +} + +/* Queue a server in its associated tree, assuming the weight is >0. + * Servers are sorted by #conns/weight. To ensure maximum accuracy, + * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key. + */ +static inline void fwlc_queue_srv(struct server *s) +{ + s->lb_node.key = s->cur_sess * SRV_EWGHT_MAX / s->eweight; + eb32_insert(s->lb_tree, &s->lb_node); +} + +/* Re-position the server in the FWLC tree after it has been assigned one + * connection or after it has released one. Note that it is possible that + * the server has been moved out of the tree due to failed health-checks. + */ +static void fwlc_srv_reposition(struct server *s) +{ + if (!s->lb_tree) + return; + fwlc_dequeue_srv(s); + fwlc_queue_srv(s); +} + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to down. + * It is not important whether the server was already down or not. It is not + * important either that the new state is completely down (the caller may not + * know all the variables of a server's state). + */ +static void fwlc_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already down */ + goto out_update_backend; + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck -= srv->prev_eweight; + p->srv_bck--; + + if (srv == p->lbprm.fbck) { + /* we lost the first backup server in a single-backup + * configuration, we must search another one. + */ + struct server *srv2 = p->lbprm.fbck; + do { + srv2 = srv2->next; + } while (srv2 && + !((srv2->state & SRV_BACKUP) && + srv_is_usable(srv2->state, srv2->eweight))); + p->lbprm.fbck = srv2; + } + } else { + p->lbprm.tot_wact -= srv->prev_eweight; + p->srv_act--; + } + + fwlc_dequeue_srv(srv); + fwlc_remove_from_tree(srv); + +out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to up. + * It is not important whether the server was already down or not. It is not + * important either that the new state is completely UP (the caller may not + * know all the variables of a server's state). This function will not change + * the weight of a server which was already up. + */ +static void fwlc_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (!srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + if (srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already up */ + goto out_update_backend; + + if (srv->state & SRV_BACKUP) { + srv->lb_tree = &p->lbprm.fwlc.bck; + p->lbprm.tot_wbck += srv->eweight; + p->srv_bck++; + + if (!(p->options & PR_O_USE_ALL_BK)) { + if (!p->lbprm.fbck) { + /* there was no backup server anymore */ + p->lbprm.fbck = srv; + } else { + /* we may have restored a backup server prior to fbck, + * in which case it should replace it. + */ + struct server *srv2 = srv; + do { + srv2 = srv2->next; + } while (srv2 && (srv2 != p->lbprm.fbck)); + if (srv2) + p->lbprm.fbck = srv; + } + } + } else { + srv->lb_tree = &p->lbprm.fwlc.act; + p->lbprm.tot_wact += srv->eweight; + p->srv_act++; + } + + /* note that eweight cannot be 0 here */ + fwlc_queue_srv(srv); + + out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function must be called after an update to server 's effective + * weight. It may be called after a state change too. + */ +static void fwlc_update_server_weight(struct server *srv) +{ + int old_state, new_state; + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + /* If changing the server's weight changes its state, we simply apply + * the procedures we already have for status change. If the state + * remains down, the server is not in any tree, so it's as easy as + * updating its values. If the state remains up with different weights, + * there are some computations to perform to find a new place and + * possibly a new tree for this server. + */ + + old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); + new_state = srv_is_usable(srv->state, srv->eweight); + + if (!old_state && !new_state) { + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + return; + } + else if (!old_state && new_state) { + fwlc_set_server_status_up(srv); + return; + } + else if (old_state && !new_state) { + fwlc_set_server_status_down(srv); + return; + } + + if (srv->lb_tree) + fwlc_dequeue_srv(srv); + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight; + srv->lb_tree = &p->lbprm.fwlc.bck; + } else { + p->lbprm.tot_wact += srv->eweight - srv->prev_eweight; + srv->lb_tree = &p->lbprm.fwlc.act; + } + + fwlc_queue_srv(srv); + + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function is responsible for building the trees in case of fast + * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to + * uweight ratio. Both active and backup groups are initialized. + */ +void fwlc_init_server_tree(struct proxy *p) +{ + struct server *srv; + struct eb_root init_head = EB_ROOT; + + p->lbprm.set_server_status_up = fwlc_set_server_status_up; + p->lbprm.set_server_status_down = fwlc_set_server_status_down; + p->lbprm.update_server_eweight = fwlc_update_server_weight; + p->lbprm.server_take_conn = fwlc_srv_reposition; + p->lbprm.server_drop_conn = fwlc_srv_reposition; + + p->lbprm.wdiv = BE_WEIGHT_SCALE; + for (srv = p->srv; srv; srv = srv->next) { + srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; + srv->prev_state = srv->state; + } + + recount_servers(p); + update_backend_weight(p); + + p->lbprm.fwlc.act = init_head; + p->lbprm.fwlc.bck = init_head; + + /* queue active and backup servers in two distinct groups */ + for (srv = p->srv; srv; srv = srv->next) { + if (!srv_is_usable(srv->state, srv->eweight)) + continue; + srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act; + fwlc_queue_srv(srv); + } +} + +/* Return next server from the FWLC tree in backend

. If the tree is empty, + * return NULL. Saturated servers are skipped. + */ +static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid) +{ + struct server *srv, *avoided; + struct eb32_node *node; + + srv = avoided = NULL; + + if (p->srv_act) + node = eb32_first(&p->lbprm.fwlc.act); + else if (p->lbprm.fbck) + return p->lbprm.fbck; + else if (p->srv_bck) + node = eb32_first(&p->lbprm.fwlc.bck); + else + return NULL; + + while (node) { + /* OK, we have a server. However, it may be saturated, in which + * case we don't want to reconsider it for now, so we'll simply + * skip it. Same if it's the server we try to avoid, in which + * case we simply remember it for later use if needed. + */ + struct server *s; + + s = eb32_entry(node, struct server, lb_node); + if (!s->maxconn || s->cur_sess < srv_dynamic_maxconn(s)) { + if (s != srvtoavoid) { + srv = s; + break; + } + avoided = s; + } + node = eb32_next(node); + } + + if (!srv) + srv = avoided; + + return srv; +} + /* * This function tries to find a running server for the proxy following * the URL parameter hash method. It looks for a specific parameter in the @@ -944,6 +1229,11 @@ int assign_server(struct session *s) if (!s->srv) return SRV_STATUS_FULL; break; + case BE_LB_ALGO_LC: + s->srv = fwlc_get_next_server(s->be, srvtoavoid); + if (!s->srv) + return SRV_STATUS_FULL; + break; case BE_LB_ALGO_SH: if (s->cli_addr.ss_family == AF_INET) len = 4; @@ -1376,6 +1666,8 @@ int connect_server(struct session *s) s->srv->cur_sess++; if (s->srv->cur_sess > s->srv->cur_sess_max) s->srv->cur_sess_max = s->srv->cur_sess; + if (s->be->lbprm.server_take_conn) + s->be->lbprm.server_take_conn(s->srv); } if (!tv_add_ifset(&s->req->cex, &now, &s->be->timeout.connect)) @@ -1564,6 +1856,10 @@ int backend_parse_balance(const char **args, char *err, int errlen, struct proxy curproxy->lbprm.algo &= ~BE_LB_ALGO; curproxy->lbprm.algo |= BE_LB_ALGO_RR; } + else if (!strcmp(args[0], "leastconn")) { + curproxy->lbprm.algo &= ~BE_LB_ALGO; + curproxy->lbprm.algo |= BE_LB_ALGO_LC; + } else if (!strcmp(args[0], "source")) { curproxy->lbprm.algo &= ~BE_LB_ALGO; curproxy->lbprm.algo |= BE_LB_ALGO_SH; @@ -1585,7 +1881,7 @@ int backend_parse_balance(const char **args, char *err, int errlen, struct proxy curproxy->url_param_len = strlen(args[1]); } else { - snprintf(err, errlen, "'balance' only supports 'roundrobin', 'source', 'uri' and 'url_param' options."); + snprintf(err, errlen, "'balance' only supports 'roundrobin', 'leastconn', 'source', 'uri' and 'url_param' options."); return -1; } return 0; diff --git a/src/cfgparse.c b/src/cfgparse.c index 0b1a0bafc..7dbadd1e7 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -2964,6 +2964,8 @@ int readcfgfile(const char *file) /* round robin relies on a weight tree */ if ((curproxy->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RR) fwrr_init_server_groups(curproxy); + else if ((curproxy->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LC) + fwlc_init_server_tree(curproxy); else init_server_map(curproxy); diff --git a/src/proto_http.c b/src/proto_http.c index f68520178..d83e601c8 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -2569,8 +2569,11 @@ int process_srv(struct session *t) t->be->options & PR_O_ABRT_CLOSE))) { /* give up */ tv_eternity(&req->cex); fd_delete(t->srv_fd); - if (t->srv) + if (t->srv) { t->srv->cur_sess--; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); + } /* note that this must not return any error because it would be able to * overwrite the client_retnclose() output. @@ -2594,8 +2597,11 @@ int process_srv(struct session *t) } else { fd_delete(t->srv_fd); - if (t->srv) + if (t->srv) { t->srv->cur_sess--; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); + } if (!(req->flags & BF_WRITE_STATUS)) conn_err = SN_ERR_SRVTO; // it was a connect timeout. @@ -2788,6 +2794,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_resp++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } t->be->failed_resp++; t->srv_state = SV_STCLOSE; @@ -2830,6 +2838,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_resp++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } t->be->failed_resp++; t->srv_state = SV_STCLOSE; @@ -2998,6 +3008,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_resp++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } cur_proxy->failed_resp++; return_srv_prx_502: @@ -3025,6 +3037,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_secu++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } cur_proxy->denied_resp++; goto return_srv_prx_502; @@ -3160,6 +3174,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_secu++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } t->be->denied_resp++; @@ -3247,6 +3263,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_resp++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } t->be->failed_resp++; t->srv_state = SV_STCLOSE; @@ -3354,6 +3372,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_resp++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } t->be->failed_resp++; //close(t->srv_fd); @@ -3374,8 +3394,11 @@ int process_srv(struct session *t) //EV_FD_CLR(t->srv_fd, DIR_WR); buffer_shutw(req); fd_delete(t->srv_fd); - if (t->srv) + if (t->srv) { t->srv->cur_sess--; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); + } //close(t->srv_fd); t->srv_state = SV_STCLOSE; /* We used to have a free connection slot. Since we'll never use it, @@ -3390,8 +3413,11 @@ int process_srv(struct session *t) //EV_FD_CLR(t->srv_fd, DIR_WR); buffer_shutw(req); fd_delete(t->srv_fd); - if (t->srv) + if (t->srv) { t->srv->cur_sess--; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); + } //close(t->srv_fd); t->srv_state = SV_STCLOSE; if (!(t->flags & SN_ERR_MASK)) @@ -3429,6 +3455,8 @@ int process_srv(struct session *t) if (t->srv) { t->srv->cur_sess--; t->srv->failed_resp++; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); } t->be->failed_resp++; //close(t->srv_fd); @@ -3449,8 +3477,11 @@ int process_srv(struct session *t) //EV_FD_CLR(t->srv_fd, DIR_RD); buffer_shutr(rep); fd_delete(t->srv_fd); - if (t->srv) + if (t->srv) { t->srv->cur_sess--; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); + } //close(t->srv_fd); t->srv_state = SV_STCLOSE; /* We used to have a free connection slot. Since we'll never use it, @@ -3465,8 +3496,11 @@ int process_srv(struct session *t) //EV_FD_CLR(t->srv_fd, DIR_RD); buffer_shutr(rep); fd_delete(t->srv_fd); - if (t->srv) + if (t->srv) { t->srv->cur_sess--; + if (t->srv->proxy->lbprm.server_drop_conn) + t->srv->proxy->lbprm.server_drop_conn(t->srv); + } //close(t->srv_fd); t->srv_state = SV_STCLOSE; if (!(t->flags & SN_ERR_MASK)) diff --git a/tests/test-fwlc.cfg b/tests/test-fwlc.cfg new file mode 100644 index 000000000..53a82dfcd --- /dev/null +++ b/tests/test-fwlc.cfg @@ -0,0 +1,61 @@ +# This is a test configuration. +# It makes use of a farm built from 4 active servers and 4 backup servers, +# all listenening to different IP addresses on port 80. Health-checks are +# TCP only on port 81 so that iptables rules permit easy selection of which +# servers are enabled or disabled. +# +# Create statistics counters this way : +# +# iptables -N http +# iptables -A OUTPUT -p tcp --syn --dport 80 -j http +# for i in $(seq 1 8); do iptables -A http -d 127.0.0.$i; done +# iptables -A http -d 127.0.0.0/24 +# +# Consult the statistics using iptables this way: +# +# iptables --line-numbers -nxvL http +# iptables -Z http +# +# +# Block individual servers like this : +# iptables -I INPUT -p tcp --dport 81 -d 127.0.0.1 -j DROP +# + +global + maxconn 1000 + stats socket /tmp/sock1 mode 600 + stats timeout 3000 + stats maxconn 2000 + +listen sample1 + mode tcp + retries 1 + redispatch + contimeout 1000 + clitimeout 120000 + srvtimeout 120000 + maxconn 40000 + bind :8080 + balance leastconn + option allbackups + server act1 127.0.0.1:80 weight 10 maxconn 200 check inter 1000 fall 1 + server act2 127.0.0.2:80 weight 20 maxconn 200 check inter 1000 fall 1 + server act3 127.0.0.3:80 weight 30 maxconn 200 check inter 1000 fall 1 + server act4 127.0.0.4:80 weight 40 maxconn 200 check inter 1000 fall 1 + server bck1 127.0.0.5:80 weight 10 check inter 1000 fall 1 backup + server bck2 127.0.0.6:80 weight 20 check inter 1000 fall 1 backup + server bck3 127.0.0.7:80 weight 30 check inter 1000 fall 1 backup + server bck4 127.0.0.8:80 weight 40 check inter 1000 fall 1 backup + option httpclose + +listen sample1 + mode http + contimeout 1000 + clitimeout 50000 + srvtimeout 50000 + maxconn 40000 + bind :8081 + balance leastconn + option httpclose + stats uri /stats + stats refresh 5