diff --git a/include/proto/backend.h b/include/proto/backend.h index 8fa8c834f..444e53a6a 100644 --- a/include/proto/backend.h +++ b/include/proto/backend.h @@ -39,10 +39,10 @@ int srv_redispatch_connect(struct session *t); int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy); -void recount_servers(struct proxy *px); void recalc_server_map(struct proxy *px); int be_downtime(struct proxy *px); void init_server_map(struct proxy *p); +void fwrr_init_server_groups(struct proxy *p); /* * This function tries to find a running server with free connection slots for @@ -118,7 +118,9 @@ static inline struct server *get_server_sh(struct proxy *px, recalc_server_map(px); l = h = 0; - if (px->srv_act > 1 || (px->srv_act == 0 && px->srv_bck > 1)) { + + /* note: we won't hash if there's only one server left */ + if (px->lbprm.tot_used > 1) { while ((l + sizeof (int)) <= len) { h ^= ntohl(*(unsigned int *)(&addr[l])); l += sizeof (int); diff --git a/include/types/backend.h b/include/types/backend.h index c0c0809a5..a71db6912 100644 --- a/include/types/backend.h +++ b/include/types/backend.h @@ -71,6 +71,10 @@ #define PR_O_CONTSTATS 0x80000000 /* continous counters */ + +/* various constants */ +#define BE_WEIGHT_SCALE 256 /* scale between user weight and effective weight */ + #endif /* _TYPES_BACKEND_H */ /* diff --git a/include/types/proxy.h b/include/types/proxy.h index 53de5132f..6d27a9bff 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -64,6 +65,17 @@ #define PR_CAP_RS 0x0004 #define PR_CAP_LISTEN (PR_CAP_FE|PR_CAP_BE|PR_CAP_RS) +/* This structure is used to apply fast weighted round robin on a server group */ +struct fwrr_group { + struct eb_root curr; /* tree for servers in "current" time range */ + struct eb_root t0, t1; /* "init" and "next" servers */ + struct eb_root *init; /* servers waiting to be placed */ + struct eb_root *next; /* servers to be placed at next run */ + int curr_pos; /* current position in the tree */ + int curr_weight; /* total weight of the current time range */ + int next_weight; /* total weight of the next time range */ +}; + struct proxy { struct listener *listen; /* the listen addresses and sockets */ struct in_addr mon_net, mon_mask; /* don't forward connections from this net (network order) FIXME: should support IPv6 */ @@ -79,7 +91,7 @@ struct proxy { struct list block_cond; /* early blocking conditions (chained) */ struct list switching_rules; /* content switching rules (chained) */ struct server *srv; /* known servers */ - int srv_act, srv_bck; /* # of running servers */ + int srv_act, srv_bck; /* # of servers eligible for LB (UP|!checked) AND (enabled+weight!=0) */ struct { int tot_wact, tot_wbck; /* total effective weights of active and backup servers */ @@ -87,11 +99,19 @@ struct proxy { int tot_used; /* total number of servers used for LB */ int wmult; /* ratio between user weight and effective weight */ int wdiv; /* ratio between effective weight and user weight */ + struct server *fbck; /* first backup server when !PR_O_USE_ALL_BK, or NULL */ struct { struct server **srv; /* the server map used to apply weights */ int rr_idx; /* next server to be elected in round robin mode */ int state; /* PR_MAP_RECALC */ } map; /* LB parameters for map-based algorithms */ + struct { + struct fwrr_group act; /* weighted round robin on the active servers */ + struct fwrr_group bck; /* weighted round robin on the backup servers */ + } fwrr; + void (*update_server_eweight)(struct server *);/* if non-NULL, to be called after eweight change */ + void (*set_server_status_up)(struct server *);/* to be called after status changes to UP */ + void (*set_server_status_down)(struct server *);/* to be called after status changes to DOWN */ } lbprm; /* LB parameters for all algorithms */ char *cookie_name; /* name of the cookie to look for */ diff --git a/include/types/server.h b/include/types/server.h index 3553fc099..425850298 100644 --- a/include/types/server.h +++ b/include/types/server.h @@ -2,7 +2,7 @@ include/types/server.h This file defines everything related to servers. - Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -57,6 +58,7 @@ struct server { struct server *next; int state; /* server state (SRV_*) */ + int prev_state; /* server state before last change (SRV_*) */ int cklen; /* the len of the cookie, to speed up checks */ char *cookie; /* the id set in the cookie */ @@ -85,6 +87,12 @@ struct server { char *id; /* just for identification */ unsigned uweight, eweight; /* user-specified weight, and effective weight */ unsigned wscore; /* weight score, used during srv map computation */ + unsigned prev_eweight; /* eweight before last change */ + unsigned rweight; /* remainer of weight in the current LB tree */ + unsigned npos, lpos; /* next and last positions in the LB tree */ + struct eb32_node lb_node; /* node used for tree-based load balancing */ + struct eb_root *lb_tree; /* we want to know in what tree the server is */ + struct server *next_full; /* next server in the temporary full list */ unsigned failed_checks, down_trans; /* failed checks and up-down transitions */ unsigned down_time; /* total time the server was down */ diff --git a/src/backend.c b/src/backend.c index 46078ceb1..9b0f0aa00 100644 --- a/src/backend.c +++ b/src/backend.c @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -46,51 +47,109 @@ #include #endif +static inline void fwrr_remove_from_tree(struct server *s); +static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s); +static inline void fwrr_dequeue_srv(struct server *s); +static void fwrr_get_srv(struct server *s); +static void fwrr_queue_srv(struct server *s); + +/* This function returns non-zero if a server with the given weight and state + * is usable for LB, otherwise zero. + */ +static inline int srv_is_usable(int state, int weight) +{ + if (!weight) + return 0; + if (!(state & SRV_RUNNING)) + return 0; + return 1; +} + /* * This function recounts the number of usable active and backup servers for * proxy

. These numbers are returned into the p->srv_act and p->srv_bck. - * This function also recomputes the total active and backup weights. + * This function also recomputes the total active and backup weights. However, + * it does nout update tot_weight nor tot_used. Use update_backend_weight() for + * this. */ -void recount_servers(struct proxy *px) +static void recount_servers(struct proxy *px) { struct server *srv; - int first_bkw = 0; px->srv_act = px->srv_bck = 0; px->lbprm.tot_wact = px->lbprm.tot_wbck = 0; + px->lbprm.fbck = NULL; for (srv = px->srv; srv != NULL; srv = srv->next) { - if (srv->state & SRV_RUNNING) { - if (srv->state & SRV_BACKUP) { - px->srv_bck++; - px->lbprm.tot_wbck += srv->eweight; - if (px->srv_bck == 1) - first_bkw = srv->eweight; - } else { - px->srv_act++; - px->lbprm.tot_wact += srv->eweight; - } + if (!srv_is_usable(srv->state, srv->eweight)) + continue; + + if (srv->state & SRV_BACKUP) { + if (!px->srv_bck && + !(px->options & PR_O_USE_ALL_BK)) + px->lbprm.fbck = srv; + px->srv_bck++; + px->lbprm.tot_wbck += srv->eweight; + } else { + px->srv_act++; + px->lbprm.tot_wact += srv->eweight; } } +} +/* This function simply updates the backend's tot_weight and tot_used values + * after servers weights have been updated. It is designed to be used after + * recount_servers() or equivalent. + */ +static void update_backend_weight(struct proxy *px) +{ if (px->srv_act) { px->lbprm.tot_weight = px->lbprm.tot_wact; px->lbprm.tot_used = px->srv_act; } - else if (px->srv_bck) { - if (px->options & PR_O_USE_ALL_BK) { - px->lbprm.tot_weight = px->lbprm.tot_wbck; - px->lbprm.tot_used = px->srv_bck; - } - else { /* the first backup server is enough */ - px->lbprm.tot_weight = first_bkw; - px->lbprm.tot_used = 1; - } + else if (px->lbprm.fbck) { + /* use only the first backup server */ + px->lbprm.tot_weight = px->lbprm.fbck->eweight; + px->lbprm.tot_used = 1; } else { - px->lbprm.tot_weight = 0; - px->lbprm.tot_used = 0; + px->lbprm.tot_weight = px->lbprm.tot_wbck; + px->lbprm.tot_used = px->srv_bck; } +} +/* this function updates the map according to server 's new state */ +static void map_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + /* FIXME: could be optimized since we know what changed */ + recount_servers(p); + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + p->lbprm.map.state |= PR_MAP_RECALC; + +} + +/* this function updates the map according to server 's new state */ +static void map_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + /* FIXME: could be optimized since we know what changed */ + recount_servers(p); + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + p->lbprm.map.state |= PR_MAP_RECALC; } /* This function recomputes the server map for proxy px. It relies on @@ -172,6 +231,10 @@ void init_server_map(struct proxy *p) int pgcd; int act, bck; + p->lbprm.set_server_status_up = map_set_server_status_up; + p->lbprm.set_server_status_down = map_set_server_status_down; + p->lbprm.update_server_eweight = NULL; + if (!p->srv) return; @@ -197,6 +260,8 @@ void init_server_map(struct proxy *p) act = bck = 0; for (srv = p->srv; srv; srv = srv->next) { srv->eweight = srv->uweight / pgcd; + srv->prev_eweight = srv->eweight; + srv->prev_state = srv->state; if (srv->state & SRV_BACKUP) bck += srv->eweight; else @@ -211,9 +276,522 @@ void init_server_map(struct proxy *p) /* recounts servers and their weights */ p->lbprm.map.state = PR_MAP_RECALC; recount_servers(p); + update_backend_weight(p); recalc_server_map(p); } +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to down. + * It is not important whether the server was already down or not. However, + * it is mandatory that the new state be down. + */ +static void fwrr_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + struct fwrr_group *grp; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already down */ + goto out_update_backend; + + grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + grp->next_weight -= srv->prev_eweight; + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; + p->srv_bck--; + + if (srv == p->lbprm.fbck) { + /* we lost the first backup server in a single-backup + * configuration, we must search another one. + */ + struct server *srv2 = p->lbprm.fbck; + do { + srv2 = srv2->next; + } while (srv2 && + !((srv2->state & SRV_BACKUP) && + srv_is_usable(srv2->state, srv2->eweight))); + p->lbprm.fbck = srv2; + } + } else { + p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; + p->srv_act--; + } + + fwrr_dequeue_srv(srv); + fwrr_remove_from_tree(srv); + +out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + +} + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to up. + * It is not important whether the server was already down or not. However, + * it is mandatory that the new state be up. This function will not change + * the weight of a server which was already up. + */ +static void fwrr_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + struct fwrr_group *grp; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already up */ + goto out_update_backend; + + grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + grp->next_weight += srv->eweight; + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; + p->srv_bck++; + + if (p->lbprm.fbck) { + /* we may have restored a backup server prior to fbck, + * in which case it should replace it. + */ + struct server *srv2 = srv; + do { + srv2 = srv2->next; + } while (srv2 && (srv2 != p->lbprm.fbck)); + if (srv2) + p->lbprm.fbck = srv; + } + } else { + p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; + p->srv_act++; + } + + /* note that eweight cannot be 0 here */ + fwrr_get_srv(srv); + srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; + fwrr_queue_srv(srv); + +out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function must be called after an update to server 's effective + * weight. It may be called after a state change too. + */ +static void fwrr_update_server_weight(struct server *srv) +{ + int old_state, new_state; + struct proxy *p = srv->proxy; + struct fwrr_group *grp; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + /* If changing the server's weight changes its state, we simply apply + * the procedures we already have for status change. If the state + * remains down, the server is not in any tree, so it's as easy as + * updating its values. If the state remains up with different weights, + * there are some computations to perform to find a new place and + * possibly a new tree for this server. + */ + + old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); + new_state = srv_is_usable(srv->state, srv->eweight); + + if (!old_state && !new_state) { + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + return; + } + else if (!old_state && new_state) { + fwrr_set_server_status_up(srv); + return; + } + else if (old_state && !new_state) { + fwrr_set_server_status_down(srv); + return; + } + + grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight; + + p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; + p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; + + if (srv->lb_tree == grp->init) { + fwrr_dequeue_srv(srv); + fwrr_queue_by_weight(grp->init, srv); + } + else if (!srv->lb_tree) { + /* FIXME: server was down. This is not possible right now but + * may be needed soon for slowstart or graceful shutdown. + */ + fwrr_dequeue_srv(srv); + fwrr_get_srv(srv); + srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; + fwrr_queue_srv(srv); + } else { + /* The server is either active or in the next queue. If it's + * still in the active queue and it has not consumed all of its + * places, let's adjust its next position. + */ + fwrr_get_srv(srv); + + if (srv->eweight > 0) { + int prev_next = srv->npos; + int step = grp->next_weight / srv->eweight; + + srv->npos = srv->lpos + step; + srv->rweight = 0; + + if (srv->npos > prev_next) + srv->npos = prev_next; + if (srv->npos < grp->curr_pos + 2) + srv->npos = grp->curr_pos + step; + } else { + /* push it into the next tree */ + srv->npos = grp->curr_pos + grp->curr_weight; + } + + fwrr_dequeue_srv(srv); + fwrr_queue_srv(srv); + } + + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* Remove a server from a tree. It must have previously been dequeued. This + * function is meant to be called when a server is going down or has its + * weight disabled. + */ +static inline void fwrr_remove_from_tree(struct server *s) +{ + s->lb_tree = NULL; +} + +/* Queue a server in the weight tree , assuming the weight is >0. + * We want to sort them by inverted weights, because we need to place + * heavy servers first in order to get a smooth distribution. + */ +static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s) +{ + /* eweight can be as high as 256*255 */ + s->lb_node.key = BE_WEIGHT_SCALE*255 - s->eweight; + eb32_insert(root, &s->lb_node); + s->lb_tree = root; +} + +/* This function is responsible for building the weight trees in case of fast + * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight + * ratio. Both active and backup groups are initialized. + */ +void fwrr_init_server_groups(struct proxy *p) +{ + struct server *srv; + struct eb_root init_head = EB_ROOT; + + p->lbprm.set_server_status_up = fwrr_set_server_status_up; + p->lbprm.set_server_status_down = fwrr_set_server_status_down; + p->lbprm.update_server_eweight = fwrr_update_server_weight; + + p->lbprm.wdiv = BE_WEIGHT_SCALE; + for (srv = p->srv; srv; srv = srv->next) { + srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; + srv->prev_state = srv->state; + } + + recount_servers(p); + update_backend_weight(p); + + /* prepare the active servers group */ + p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight = + p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact; + p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 = + p->lbprm.fwrr.act.t1 = init_head; + p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0; + p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1; + + /* prepare the backup servers group */ + p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight = + p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck; + p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 = + p->lbprm.fwrr.bck.t1 = init_head; + p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0; + p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1; + + /* queue active and backup servers in two distinct groups */ + for (srv = p->srv; srv; srv = srv->next) { + if (!srv_is_usable(srv->state, srv->eweight)) + continue; + fwrr_queue_by_weight((srv->state & SRV_BACKUP) ? + p->lbprm.fwrr.bck.init : + p->lbprm.fwrr.act.init, + srv); + } +} + +/* simply removes a server from a weight tree */ +static inline void fwrr_dequeue_srv(struct server *s) +{ + eb32_delete(&s->lb_node); +} + +/* queues a server into the appropriate group and tree depending on its + * backup status, and ->npos. If the server is disabled, simply assign + * it to the NULL tree. + */ +static void fwrr_queue_srv(struct server *s) +{ + struct proxy *p = s->proxy; + struct fwrr_group *grp; + + grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + + /* Delay everything which does not fit into the window and everything + * which does not fit into the theorical new window. + */ + if (!srv_is_usable(s->state, s->eweight)) { + fwrr_remove_from_tree(s); + } + else if (s->eweight <= 0 || + s->npos >= 2 * grp->curr_weight || + s->npos >= grp->curr_weight + grp->next_weight) { + /* put into next tree, and readjust npos in case we could + * finally take this back to current. */ + s->npos -= grp->curr_weight; + fwrr_queue_by_weight(grp->next, s); + } + else { + /* FIXME: we want to multiply by a constant to avoid overrides + * after weight changes, but this can easily overflow on 32-bit + * values. We need to change this for a 64-bit tree, and keep + * the 65536 factor for optimal smoothness (both rweight and + * eweight are 16 bit entities). s->npos is bound by the number + * of servers times the maximum eweight (~= nsrv << 16). + */ + //s->lb_node.key = grp->curr_weight * s->npos + s->rweight - s->eweight; + //s->lb_node.key = 65536 * s->npos + s->rweight - s->eweight; + s->lb_node.key = 16 * s->npos + (s->rweight - s->eweight) / 4096; + eb32i_insert(&grp->curr, &s->lb_node); + s->lb_tree = &grp->curr; + } +} + +/* prepares a server when extracting it from the "init" tree */ +static inline void fwrr_get_srv_init(struct server *s) +{ + s->npos = s->rweight = 0; +} + +/* prepares a server when extracting it from the "next" tree */ +static inline void fwrr_get_srv_next(struct server *s) +{ + struct fwrr_group *grp = (s->state & SRV_BACKUP) ? + &s->proxy->lbprm.fwrr.bck : + &s->proxy->lbprm.fwrr.act; + + s->npos += grp->curr_weight; +} + +/* prepares a server when it was marked down */ +static inline void fwrr_get_srv_down(struct server *s) +{ + struct fwrr_group *grp = (s->state & SRV_BACKUP) ? + &s->proxy->lbprm.fwrr.bck : + &s->proxy->lbprm.fwrr.act; + + s->npos = grp->curr_pos; +} + +/* prepares a server when extracting it from its tree */ +static void fwrr_get_srv(struct server *s) +{ + struct proxy *p = s->proxy; + struct fwrr_group *grp = (s->state & SRV_BACKUP) ? + &p->lbprm.fwrr.bck : + &p->lbprm.fwrr.act; + + if (s->lb_tree == grp->init) { + fwrr_get_srv_init(s); + } + else if (s->lb_tree == grp->next) { + fwrr_get_srv_next(s); + } + else if (s->lb_tree == NULL) { + fwrr_get_srv_down(s); + } +} + +/* switches trees "init" and "next" for FWRR group . "init" should be empty + * when this happens, and "next" filled with servers sorted by weights. + */ +static inline void fwrr_switch_trees(struct fwrr_group *grp) +{ + struct eb_root *swap; + swap = grp->init; + grp->init = grp->next; + grp->next = swap; + grp->curr_weight = grp->next_weight; + grp->curr_pos = grp->curr_weight; +} + +/* return next server from the current tree in FWRR group , or a server + * from the "init" tree if appropriate. If both trees are empty, return NULL. + */ +static struct server *fwrr_get_server_from_group(struct fwrr_group *grp) +{ + struct eb32_node *node; + struct server *s; + + node = eb32_first(&grp->curr); + s = eb32_entry(node, struct server, lb_node); + + if (!node || s->npos > grp->curr_pos) { + /* either we have no server left, or we have a hole */ + struct eb32_node *node2; + node2 = eb32_first(grp->init); + if (node2) { + node = node2; + s = eb32_entry(node, struct server, lb_node); + fwrr_get_srv_init(s); + if (s->eweight == 0) /* FIXME: is it possible at all ? */ + node = NULL; + } + } + if (node) + return s; + else + return NULL; +} + +/* Computes next position of server in the group. It is mandatory for + * to have a non-zero, positive eweight. +*/ +static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s) +{ + if (!s->npos) { + /* first time ever for this server */ + s->lpos = grp->curr_pos; + s->npos = grp->curr_pos + grp->next_weight / s->eweight; + s->rweight += grp->next_weight % s->eweight; + + if (s->rweight >= s->eweight) { + s->rweight -= s->eweight; + s->npos++; + } + } else { + s->lpos = s->npos; + s->npos += grp->next_weight / s->eweight; + s->rweight += grp->next_weight % s->eweight; + + if (s->rweight >= s->eweight) { + s->rweight -= s->eweight; + s->npos++; + } + } +} + +/* Return next server from the current tree in backend

, or a server from + * the init tree if appropriate. If both trees are empty, return NULL. + * Saturated servers are skipped and requeued. + */ +static struct server *fwrr_get_next_server(struct proxy *p) +{ + struct server *srv; + struct fwrr_group *grp; + struct server *full; + int switched; + + if (p->srv_act) + grp = &p->lbprm.fwrr.act; + else if (p->lbprm.fbck) + return p->lbprm.fbck; + else if (p->srv_bck) + grp = &p->lbprm.fwrr.bck; + else + return NULL; + + switched = 0; + full = NULL; /* NULL-terminated list of saturated servers */ + while (1) { + /* if we see an empty group, let's first try to collect weights + * which might have recently changed. + */ + if (!grp->curr_weight) + grp->curr_pos = grp->curr_weight = grp->next_weight; + + /* get first server from the "current" tree. When the end of + * the tree is reached, we may have to switch, but only once. + */ + while (1) { + srv = fwrr_get_server_from_group(grp); + if (srv) + break; + if (switched) + goto requeue_servers; + switched = 1; + fwrr_switch_trees(grp); + + } + + /* OK, we have a server. However, it may be saturated, in which + * case we don't want to reconsider it for now. We'll update + * its position and dequeue it anyway, so that we can move it + * to a better place afterwards. + */ + fwrr_update_position(grp, srv); + fwrr_dequeue_srv(srv); + grp->curr_pos++; + if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) + break; + + /* the server is saturated, let's chain it for later reinsertion */ + srv->next_full = full; + full = srv; + } + + /* OK, we got the best server, let's update it */ + fwrr_queue_srv(srv); + + requeue_servers: + if (unlikely(full)) { + if (switched) { + /* the tree has switched, requeue all extracted servers + * into "init", because their place was lost, and only + * their weight matters. + */ + do { + fwrr_queue_by_weight(grp->init, full); + full = full->next_full; + } while (full); + } else { + /* requeue all extracted servers just as if they were consumed + * so that they regain their expected place. + */ + do { + fwrr_queue_srv(full); + full = full->next_full; + } while (full); + } + } + return srv; +} + /* * This function tries to find a running server for the proxy following * the URL parameter hash method. It looks for a specific parameter in the @@ -314,12 +892,12 @@ int assign_server(struct session *s) return SRV_STATUS_OK; } - if (!s->be->srv_act && !s->be->srv_bck) + if (!s->be->lbprm.tot_weight) return SRV_STATUS_NOSRV; switch (s->be->options & PR_O_BALANCE) { case PR_O_BALANCE_RR: - s->srv = get_server_rr_with_conns(s->be); + s->srv = fwrr_get_next_server(s->be); if (!s->srv) return SRV_STATUS_FULL; break; @@ -347,7 +925,7 @@ int assign_server(struct session *s) s->txn.req.sol + s->txn.req.sl.rq.u, s->txn.req.sl.rq.u_l); if (!s->srv) { - /* parameter not found, fall back to round robin */ + /* parameter not found, fall back to round robin on the map */ s->srv = get_server_rr_with_conns(s->be); if (!s->srv) return SRV_STATUS_FULL; @@ -883,7 +1461,7 @@ int srv_redispatch_connect(struct session *t) } int be_downtime(struct proxy *px) { - if ((px->srv_act || px->srv_bck) && px->last_change < now.tv_sec) // ignore negative time + if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time return px->down_time; return now.tv_sec - px->last_change + px->down_time; diff --git a/src/cfgparse.c b/src/cfgparse.c index 9ca0509a8..d9b2ce6a3 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -1564,6 +1564,8 @@ int cfg_parse_listen(const char *file, int linenum, char **args) curproxy->srv_bck++; else curproxy->srv_act++; + + newsrv->prev_state = newsrv->state; } else if (!strcmp(args[0], "log")) { /* syslog server address */ struct sockaddr_in *sa; @@ -2655,7 +2657,11 @@ int readcfgfile(const char *file) curproxy->lbprm.wmult = 1; /* default weight multiplier */ curproxy->lbprm.wdiv = 1; /* default weight divider */ - init_server_map(curproxy); + /* round robin relies on a weight tree */ + if ((curproxy->options & PR_O_BALANCE) == PR_O_BALANCE_RR) + fwrr_init_server_groups(curproxy); + else + init_server_map(curproxy); if (curproxy->options & PR_O_LOGASAP) curproxy->to_log &= ~LW_BYTES; diff --git a/src/checks.c b/src/checks.c index 37132101b..5fe98ab28 100644 --- a/src/checks.c +++ b/src/checks.c @@ -61,9 +61,7 @@ static void set_server_down(struct server *s) s->last_change = now.tv_sec; s->state &= ~SRV_RUNNING; - - recount_servers(s->proxy); - s->proxy->lbprm.map.state |= PR_MAP_RECALC; + s->proxy->lbprm.set_server_status_down(s); /* we might have sessions queued on this server and waiting for * a connection. Those which are redispatchable will be queued @@ -467,19 +465,18 @@ void process_chk(struct task *t, struct timeval *next) if (s->health == s->rise) { int xferred; - if (s->last_change < now.tv_sec) // ignore negative times - s->down_time += now.tv_sec - s->last_change; - s->last_change = now.tv_sec; - s->state |= SRV_RUNNING; - if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) { if (s->proxy->last_change < now.tv_sec) // ignore negative times s->proxy->down_time += now.tv_sec - s->proxy->last_change; s->proxy->last_change = now.tv_sec; } - recount_servers(s->proxy); - s->proxy->lbprm.map.state |= PR_MAP_RECALC; + if (s->last_change < now.tv_sec) // ignore negative times + s->down_time += now.tv_sec - s->last_change; + + s->last_change = now.tv_sec; + s->state |= SRV_RUNNING; + s->proxy->lbprm.set_server_status_up(s); /* check if we can handle some connections queued at the proxy. We * will take as many as we can handle.