diff --git a/Makefile b/Makefile index d68d26368..4b5cc4072 100644 --- a/Makefile +++ b/Makefile @@ -876,7 +876,8 @@ OBJS = src/proto_http.o src/cfgparse.o src/server.o src/stream.o \ src/sha1.o src/hpack-tbl.o src/hpack-enc.o src/uri_auth.o \ src/time.o src/proto_udp.o src/arg.o src/signal.o \ src/protocol.o src/lru.o src/hdr_idx.o src/hpack-huff.o \ - src/mailers.o src/h2.o src/base64.o src/hash.o src/http.o + src/mailers.o src/h2.o src/base64.o src/hash.o src/http.o \ + src/proto_sockpair.o EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o $(EBTREE_DIR)/eb32sctree.o \ $(EBTREE_DIR)/eb32tree.o $(EBTREE_DIR)/eb64tree.o \ diff --git a/doc/configuration.txt b/doc/configuration.txt index 2332bb7d8..31974cc2a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -2561,6 +2561,10 @@ bind / [, ...] [param*] - 'fd@' -> use file descriptor inherited from the parent. The fd must be bound and may or may not already be listening. + - 'sockpair@'-> like fd@ but you must use the fd of a + connected unix socket or of a socketpair. The bind waits + to receive a FD over the unix socket and uses it as if it + was the FD of an accept(). Should be used carefully. You may want to reference some environment variables in the address parameter, see section 2.3 about environment variables. @@ -7746,6 +7750,12 @@ server
[:[port]] [param*] - 'ipv6@' -> address is always IPv6 - 'unix@' -> address is a path to a local unix socket - 'abns@' -> address is in abstract namespace (Linux only) + - 'sockpair@' -> address is the FD of a connected unix + socket or of a socketpair. During a connection, the + backend creates a pair of connected sockets, and passes + one of them over the FD. The bind part will use the + received socket as the client FD. Should be used + carefully. You may want to reference some environment variables in the address parameter, see section 2.3 about environment variables. The "init-addr" setting can be used to modify the way diff --git a/include/common/standard.h b/include/common/standard.h index 4a7b2a333..ad1a76748 100644 --- a/include/common/standard.h +++ b/include/common/standard.h @@ -38,6 +38,7 @@ #include #include #include +#include #ifndef LLONG_MAX # define LLONG_MAX 9223372036854775807LL @@ -956,7 +957,7 @@ static inline int is_inet_addr(const struct sockaddr_storage *addr) */ static inline int is_addr(const struct sockaddr_storage *addr) { - if (addr->ss_family == AF_UNIX) + if (addr->ss_family == AF_UNIX || addr->ss_family == AF_CUST_SOCKPAIR) return 1; else return is_inet_addr(addr); diff --git a/include/proto/proto_sockpair.h b/include/proto/proto_sockpair.h new file mode 100644 index 000000000..4692461ba --- /dev/null +++ b/include/proto/proto_sockpair.h @@ -0,0 +1,28 @@ +/* + * Socket Pair protocol layer (sockpair) + * + * Copyright HAProxy Technologies - William Lallemand + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_PROTO_SOCKPAIR_H +# define _PROTO_PROTO_SOCKPAIR_H + +int recv_fd_uxst(int sock); +int send_fd_uxst(int fd, int send_fd); + +#endif /* _PROTO_PROTO_SOCKPAIR_H */ + diff --git a/src/cfgparse.c b/src/cfgparse.c index 2ac7b9659..e20ab72da 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -295,6 +295,20 @@ int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, } port = end = get_host_port(ss2); + + } else if (ss2->ss_family == AF_CUST_SOCKPAIR) { + socklen_t addr_len; + inherited = 1; + + fd = ((struct sockaddr_in *)ss2)->sin_addr.s_addr; + addr_len = sizeof(*ss2); + if (getsockname(fd, (struct sockaddr *)ss2, &addr_len) == -1) { + memprintf(err, "cannot use file descriptor '%d' : %s.\n", fd, strerror(errno)); + goto fail; + } + + ss2->ss_family = AF_CUST_SOCKPAIR; /* reassign AF_CUST_SOCKPAIR because of getsockname */ + port = end = 0; } /* OK the address looks correct */ diff --git a/src/listener.c b/src/listener.c index 09ac50bd2..fb2306927 100644 --- a/src/listener.c +++ b/src/listener.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -541,6 +542,12 @@ void listener_accept(int fd) goto end; } + /* with sockpair@ we don't want to do an accept */ + if (unlikely(l->addr.ss_family == AF_CUST_SOCKPAIR)) { + if ((cfd = recv_fd_uxst(fd)) != -1) + fcntl(cfd, F_SETFL, O_NONBLOCK); + } else + #ifdef USE_ACCEPT4 /* only call accept4() if it's known to be safe, otherwise * fallback to the legacy accept() + fcntl(). diff --git a/src/proto_sockpair.c b/src/proto_sockpair.c new file mode 100644 index 000000000..8f4a87462 --- /dev/null +++ b/src/proto_sockpair.c @@ -0,0 +1,405 @@ +/* + * Socket Pair protocol layer (sockpair) + * + * Copyright HAProxy Technologies - William Lallemand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +static void sockpair_add_listener(struct listener *listener, int port); +static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen); +static int sockpair_bind_listeners(struct protocol *proto, char *errmsg, int errlen); +static int sockpair_connect_server(struct connection *conn, int data, int delack); + +/* Note: must not be declared as its list will be overwritten */ +static struct protocol proto_sockpair = { + .name = "sockpair", + .sock_domain = AF_CUST_SOCKPAIR, + .sock_type = SOCK_STREAM, + .sock_prot = 0, + .sock_family = AF_UNIX, + .sock_addrlen = sizeof(struct sockaddr_un), + .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),/* path len */ + .accept = &listener_accept, + .connect = &sockpair_connect_server, + .bind = sockpair_bind_listener, + .bind_all = sockpair_bind_listeners, + .unbind_all = NULL, + .enable_all = enable_all_listeners, + .disable_all = disable_all_listeners, + .get_src = NULL, + .get_dst = NULL, + .pause = NULL, + .add = sockpair_add_listener, + .listeners = LIST_HEAD_INIT(proto_sockpair.listeners), + .nb_listeners = 0, +}; + +/* Add to the list of sockpair listeners (port is ignored). The + * listener's state is automatically updated from LI_INIT to LI_ASSIGNED. + * The number of listeners for the protocol is updated. + */ +static void sockpair_add_listener(struct listener *listener, int port) +{ + if (listener->state != LI_INIT) + return; + listener->state = LI_ASSIGNED; + listener->proto = &proto_sockpair; + LIST_ADDQ(&proto_sockpair.listeners, &listener->proto_list); + proto_sockpair.nb_listeners++; +} + +/* This function creates all UNIX sockets bound to the protocol entry . + * It is intended to be used as the protocol's bind_all() function. + * The sockets will be registered but not added to any fd_set, in order not to + * loose them across the fork(). A call to uxst_enable_listeners() is needed + * to complete initialization. + * + * The return value is composed from ERR_NONE, ERR_RETRYABLE and ERR_FATAL. + */ +static int sockpair_bind_listeners(struct protocol *proto, char *errmsg, int errlen) +{ + struct listener *listener; + int err = ERR_NONE; + + list_for_each_entry(listener, &proto->listeners, proto_list) { + err |= sockpair_bind_listener(listener, errmsg, errlen); + if (err & ERR_ABORT) + break; + } + return err; +} + +/* This function changes the state from ASSIGNED to LISTEN. The socket is NOT + * enabled for polling. The return value is composed from ERR_NONE, + * ERR_RETRYABLE and ERR_FATAL. It may return a warning or an error message in + * if the message is at most bytes long (including '\0'). + * Note that may be NULL if is also zero. + */ +static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen) +{ + int fd = listener->fd; + int err; + const char *msg = NULL; + + err = ERR_NONE; + + /* ensure we never return garbage */ + if (errlen) + *errmsg = 0; + + if (listener->state != LI_ASSIGNED) + return ERR_NONE; /* already bound */ + + if (listener->fd == -1) { + err |= ERR_FATAL | ERR_ALERT; + msg = "sockpair can be only used with inherited FDs"; + goto err_return; + } + + if (fd >= global.maxsock) { + err |= ERR_FATAL | ERR_ALERT; + msg = "socket(): not enough free sockets, raise -n argument"; + goto err_return; + } + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + err |= ERR_FATAL | ERR_ALERT; + msg = "cannot make sockpair non-blocking"; + goto err_return; + } + + listener->state = LI_LISTEN; + + fd_insert(fd, listener, listener->proto->accept, + listener->bind_conf->bind_thread[relative_pid-1] ? + listener->bind_conf->bind_thread[relative_pid-1] : MAX_THREADS_MASK); + + return err; + + err_return: + if (msg && errlen) + snprintf(errmsg, errlen, "%s [fd %d]", msg, fd); + return err; +} + +/* + * Send FD over a unix socket + * + * is the FD to send + * is the fd of the unix socket to use for the transfer + * + * The iobuf variable could be use in the future to enhance the protocol. + */ +int send_fd_uxst(int fd, int send_fd) +{ + char iobuf[2]; + struct iovec iov; + struct msghdr msghdr; + + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + char buf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg = (void *)buf; + + int *fdptr; + + iov.iov_base = iobuf; + iov.iov_len = sizeof(iobuf); + + memset(&msghdr, 0, sizeof(msghdr)); + msghdr.msg_iov = &iov; + msghdr.msg_iovlen = 1; + + /* Now send the fds */ + msghdr.msg_control = cmsgbuf; + msghdr.msg_controllen = CMSG_SPACE(sizeof(int)); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + fdptr = (int *)CMSG_DATA(cmsg); + memcpy(fdptr, &send_fd, sizeof(send_fd)); + + if (sendmsg(fd, &msghdr, 0) != sizeof(iobuf)) { + ha_warning("Failed to transfer socket\n"); + return 1; + } + + return 0; +} + +/* + * + * This function works like uxst_connect_server but insteads of creating a + * socket and establishing a connection, it creates a pair of connected + * sockets, and send one of them through the destination FD. The destination FD + * is stored in addr.to->sin_addr.s_addr during configuration parsing. + * + * conn->target may point either to a valid server or to a backend, depending + * on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are supported. The + * parameter is a boolean indicating whether there are data waiting for + * being sent or not, in order to adjust data write polling and on some + * platforms. The argument is ignored. + * + * Note that a pending send_proxy message accounts for data. + * + * It can return one of : + * - SF_ERR_NONE if everything's OK + * - SF_ERR_SRVTO if there are no more servers + * - SF_ERR_SRVCL if the connection was refused by the server + * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn) + * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...) + * - SF_ERR_INTERNAL for any other purely internal errors + * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted. + * + * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise + * it's invalid and the caller has nothing to do. + */ +static int sockpair_connect_server(struct connection *conn, int data, int delack) +{ + int sv[2], fd, dst_fd = -1; + + /* the FD is stored in the sockaddr struct */ + dst_fd = ((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr; + + conn->flags = 0; + + if (obj_type(conn->target) != OBJ_TYPE_PROXY && + obj_type(conn->target) != OBJ_TYPE_SERVER) { + conn->flags |= CO_FL_ERROR; + return SF_ERR_INTERNAL; + } + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + ha_alert("socketpair(): Cannot create socketpair. Giving up.\n"); + conn->flags |= CO_FL_ERROR; + return SF_ERR_RESOURCE; + } + + fd = conn->handle.fd = sv[1]; + + if (fd >= global.maxsock) { + /* do not log anything there, it's a normal condition when this option + * is used to serialize connections to a server ! + */ + ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); + close(sv[0]); + close(sv[1]); + conn->err_code = CO_ER_CONF_FDLIM; + conn->flags |= CO_FL_ERROR; + return SF_ERR_PRXCOND; /* it is a configuration limit */ + } + + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); + close(sv[0]); + close(sv[1]); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + return SF_ERR_INTERNAL; + } + + /* if a send_proxy is there, there are data */ + data |= conn->send_proxy_ofs; + + if (global.tune.server_sndbuf) + setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf)); + + if (global.tune.server_rcvbuf) + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf)); + + /* The new socket is sent on the other side, it should be retrieved and + * considered as an 'accept' socket on the server side */ + if (send_fd_uxst(dst_fd, sv[0]) == -1) { + close(sv[0]); + close(sv[1]); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + return SF_ERR_INTERNAL; + } + + close(sv[0]); /* we don't need this side anymore */ + + conn->flags &= ~CO_FL_WAIT_L4_CONN; + + conn->flags |= CO_FL_ADDR_TO_SET; + + /* Prepare to send a few handshakes related to the on-wire protocol. */ + if (conn->send_proxy_ofs) + conn->flags |= CO_FL_SEND_PROXY; + + conn_ctrl_init(conn); /* registers the FD */ + fdtab[fd].linger_risk = 0; /* no need to disable lingering */ + + if (conn_xprt_init(conn) < 0) { + conn_full_close(conn); + conn->flags |= CO_FL_ERROR; + return SF_ERR_RESOURCE; + } + + if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN)) { + conn_sock_want_send(conn); /* for connect status, proxy protocol or SSL */ + } + else { + /* If there's no more handshake, we need to notify the data + * layer when the connection is already OK otherwise we'll have + * no other opportunity to do it later (eg: health checks). + */ + data = 1; + } + + if (data) + conn_xprt_want_send(conn); /* prepare to send data if any */ + + return SF_ERR_NONE; /* connection is OK */ +} + + +/* + * Receive a file descriptor transfered from a unix socket. + * + * Return -1 or a socket fd; + * + * The iobuf variable could be use in the future to enhance the protocol. + */ +int recv_fd_uxst(int sock) +{ + struct msghdr msghdr; + struct iovec iov; + char iobuf[2]; + + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + char buf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg = (void *)buf; + + + int recv_fd = -1; + int ret = -1; + + memset(&msghdr, 0, sizeof(msghdr)); + + iov.iov_base = iobuf; + iov.iov_len = sizeof(iobuf); + + msghdr.msg_iov = &iov; + msghdr.msg_iovlen = 1; + + msghdr.msg_control = cmsgbuf; + msghdr.msg_controllen = CMSG_SPACE(sizeof(int)); + + iov.iov_len = sizeof(iobuf); + iov.iov_base = iobuf; + + while (1) { + ret = recvmsg(sock, &msghdr, 0); + if (ret == -1 && errno == EINTR) + continue; + else + break; + } + + if (ret == -1) + return ret; + + cmsg = CMSG_FIRSTHDR(&msghdr); + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + size_t totlen = cmsg->cmsg_len - + CMSG_LEN(0); + memcpy(&recv_fd, CMSG_DATA(cmsg), totlen); + } + return recv_fd; +} + +__attribute__((constructor)) +static void __uxst_protocol_init(void) +{ + protocol_register(&proto_sockpair); +} + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/standard.c b/src/standard.c index 04df94ead..ae1f54eac 100644 --- a/src/standard.c +++ b/src/standard.c @@ -894,7 +894,22 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int else ss.ss_family = AF_UNSPEC; - if (ss.ss_family == AF_UNSPEC && strncmp(str2, "fd@", 3) == 0) { + if (ss.ss_family == AF_UNSPEC && strncmp(str2, "sockpair@", 9) == 0) { + char *endptr; + + str2 += 9; + + ((struct sockaddr_in *)&ss)->sin_addr.s_addr = strtol(str2, &endptr, 10); + + if (!*str2 || *endptr) { + memprintf(err, "file descriptor '%s' is not a valid integer in '%s'\n", str2, str); + goto out; + } + + ss.ss_family = AF_CUST_SOCKPAIR; + + } + else if (ss.ss_family == AF_UNSPEC && strncmp(str2, "fd@", 3) == 0) { char *endptr; str2 += 3;