mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-08-06 23:27:04 +02:00
As UNIX Domain sockets could be attached to Linux namespaces (see more details about it from the Linux kernel patch set below: https://lore.kernel.org/netdev/m1hbl7hxo3.fsf@fess.ebiederm.org), it is better to use my_socket_at() in order to create UNIX listener's socket. my_socket_at() takes in account a network namespace, that may be configured for a frontend in the bind line: frontend fe_foo ... bind uxst@frontend.sock user haproxy group haproxy mode 660 namespace frontend Like this, namespace aware applications as netstat for example, will see this listening socket in its 'frontend' namespace and not in the root namespace as it was before. It is important to mention, that fixes in Linux kernel referenced above allow to connect to this listener's socket from the root and from any other namespace. UNIX Domain socket is protected by its permission set, which must be set with caution on its inode.
388 lines
12 KiB
C
388 lines
12 KiB
C
/*
|
|
* SOCK_UNIX socket management
|
|
*
|
|
* Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/socket.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <sys/un.h>
|
|
|
|
#include <haproxy/api.h>
|
|
#include <haproxy/errors.h>
|
|
#include <haproxy/fd.h>
|
|
#include <haproxy/global.h>
|
|
#include <haproxy/listener.h>
|
|
#include <haproxy/receiver-t.h>
|
|
#include <haproxy/namespace.h>
|
|
#include <haproxy/sock.h>
|
|
#include <haproxy/sock_unix.h>
|
|
#include <haproxy/tools.h>
|
|
|
|
|
|
struct proto_fam proto_fam_unix = {
|
|
.name = "unix",
|
|
.sock_domain = PF_UNIX,
|
|
.sock_family = AF_UNIX,
|
|
.sock_addrlen = sizeof(struct sockaddr_un),
|
|
.l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
|
|
.addrcmp = sock_unix_addrcmp,
|
|
.bind = sock_unix_bind_receiver,
|
|
.get_src = sock_get_src,
|
|
.get_dst = sock_get_dst,
|
|
};
|
|
|
|
/* PLEASE NOTE for functions below:
|
|
*
|
|
* The address family SHOULD always be checked. In some cases a function will
|
|
* be used in a situation where the address family is guaranteed (e.g. protocol
|
|
* definitions), so the test may be avoided. This special case must then be
|
|
* mentioned in the comment before the function definition.
|
|
*/
|
|
|
|
|
|
/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
|
|
* if they do not match. It also supports ABNS socket addresses (those starting
|
|
* with \0). For regular UNIX sockets however, this does explicitly support
|
|
* matching names ending exactly with .XXXXX.tmp which are newly bound sockets
|
|
* about to be replaced; this suffix is then ignored. Note that our UNIX socket
|
|
* paths are always zero-terminated.
|
|
*/
|
|
int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
|
|
{
|
|
const struct sockaddr_un *au = (const struct sockaddr_un *)a;
|
|
const struct sockaddr_un *bu = (const struct sockaddr_un *)b;
|
|
int idx, dot, idx2;
|
|
|
|
if (a->ss_family != b->ss_family)
|
|
return -1;
|
|
|
|
if (a->ss_family != AF_UNIX)
|
|
return -1;
|
|
|
|
if (au->sun_path[0] != bu->sun_path[0])
|
|
return -1;
|
|
|
|
if (au->sun_path[0] == 0)
|
|
return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
|
|
|
|
idx = 1; dot = 0;
|
|
while (au->sun_path[idx] == bu->sun_path[idx]) {
|
|
if (au->sun_path[idx] == 0)
|
|
return 0;
|
|
if (au->sun_path[idx] == '.')
|
|
dot = idx;
|
|
idx++;
|
|
}
|
|
|
|
/* Now we have a difference. It's OK if they are within or after a
|
|
* sequence of digits following a dot, and are followed by ".tmp".
|
|
*
|
|
* make sure to perform the check against tempname if the compared
|
|
* string is in "final" format (does not end with ".XXXX.tmp").
|
|
*
|
|
* Examples:
|
|
* /tmp/test matches with /tmp/test.1822.tmp
|
|
* /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp
|
|
*/
|
|
if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) {
|
|
if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.')
|
|
dot = idx; /* try to match against temp path */
|
|
else
|
|
return -1; /* invalid temp path */
|
|
}
|
|
|
|
if (!dot)
|
|
return -1;
|
|
|
|
/* First, check in path "a" */
|
|
if (au->sun_path[idx] != 0) {
|
|
for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
|
|
idx2++;
|
|
if (strcmp(au->sun_path + idx2, ".tmp") != 0)
|
|
return -1;
|
|
}
|
|
|
|
/* Then check in path "b" */
|
|
if (bu->sun_path[idx] != 0) {
|
|
for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
|
|
;
|
|
if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
|
|
return -1;
|
|
}
|
|
|
|
/* OK that's a match */
|
|
return 0;
|
|
}
|
|
|
|
/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
|
|
* context, respectively, with ->bind_thread as the thread mask. Returns an
|
|
* error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
|
|
* an error message may be passed into <errmsg>.
|
|
*/
|
|
int sock_unix_bind_receiver(struct receiver *rx, char **errmsg)
|
|
{
|
|
char tempname[MAXPATHLEN];
|
|
char backname[MAXPATHLEN];
|
|
struct sockaddr_un addr;
|
|
const char *path;
|
|
int maxpathlen;
|
|
int fd, err, ext, ret;
|
|
|
|
/* ensure we never return garbage */
|
|
if (errmsg)
|
|
*errmsg = 0;
|
|
|
|
err = ERR_NONE;
|
|
|
|
if (rx->flags & RX_F_BOUND)
|
|
return ERR_NONE;
|
|
|
|
if (rx->flags & RX_F_MUST_DUP) {
|
|
/* this is a secondary receiver that is an exact copy of a
|
|
* reference which must already be bound (or has failed).
|
|
* We'll try to dup() the other one's FD and take it. We
|
|
* try hard not to reconfigure the socket since it's shared.
|
|
*/
|
|
BUG_ON(!rx->shard_info);
|
|
if (!(rx->shard_info->ref->flags & RX_F_BOUND)) {
|
|
/* it's assumed that the first one has already reported
|
|
* the error, let's not spam with another one, and do
|
|
* not set ERR_ALERT.
|
|
*/
|
|
err |= ERR_RETRYABLE;
|
|
goto bind_ret_err;
|
|
}
|
|
/* taking the other one's FD will result in it being marked
|
|
* extern and being dup()ed. Let's mark the receiver as
|
|
* inherited so that it properly bypasses all second-stage
|
|
* setup and avoids being passed to new processes.
|
|
*/
|
|
rx->flags |= RX_F_INHERITED;
|
|
rx->fd = rx->shard_info->ref->fd;
|
|
}
|
|
|
|
/* if no FD was assigned yet, we'll have to either find a compatible
|
|
* one or create a new one.
|
|
*/
|
|
if (rx->fd == -1)
|
|
rx->fd = sock_find_compatible_fd(rx);
|
|
|
|
path = ((struct sockaddr_un *)&rx->addr)->sun_path;
|
|
maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
|
|
|
|
/* if the listener already has an fd assigned, then we were offered the
|
|
* fd by an external process (most likely the parent), and we don't want
|
|
* to create a new socket. However we still want to set a few flags on
|
|
* the socket.
|
|
*/
|
|
fd = rx->fd;
|
|
ext = (fd >= 0);
|
|
if (ext)
|
|
goto fd_ready;
|
|
|
|
if (path[0]) {
|
|
ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
|
|
if (ret < 0 || ret >= sizeof(addr.sun_path)) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
|
|
goto bind_return;
|
|
}
|
|
|
|
ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
|
|
if (ret < 0 || ret >= maxpathlen) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
|
|
goto bind_return;
|
|
}
|
|
|
|
/* 2. clean existing orphaned entries */
|
|
if (unlink(tempname) < 0 && errno != ENOENT) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
|
|
goto bind_return;
|
|
}
|
|
|
|
if (unlink(backname) < 0 && errno != ENOENT) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
|
|
goto bind_return;
|
|
}
|
|
|
|
/* 3. backup existing socket */
|
|
if (link(path, backname) < 0 && errno != ENOENT) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno));
|
|
goto bind_return;
|
|
}
|
|
|
|
/* Note: this test is redundant with the snprintf one above and
|
|
* will never trigger, it's just added as the only way to shut
|
|
* gcc's painfully dumb warning about possibly truncated output
|
|
* during strncpy(). Don't move it above or smart gcc will not
|
|
* see it!
|
|
*/
|
|
if (strlen(tempname) >= sizeof(addr.sun_path)) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
|
|
goto bind_return;
|
|
}
|
|
|
|
strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
|
|
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
|
|
}
|
|
else {
|
|
/* first char is zero, it's an abstract socket whose address
|
|
* is defined by all the bytes past this zero.
|
|
*/
|
|
memcpy(addr.sun_path, path, sizeof(addr.sun_path));
|
|
}
|
|
addr.sun_family = AF_UNIX;
|
|
|
|
fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
|
|
rx->proto->sock_type, rx->proto->sock_prot);
|
|
if (fd < 0) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
|
|
goto bind_return;
|
|
}
|
|
|
|
fd_ready:
|
|
if (ext && fd < global.maxsock && fdtab[fd].owner) {
|
|
/* This FD was already bound so this means that it was already
|
|
* known and registered before parsing, hence it's an inherited
|
|
* FD. The only reason why it's already known here is that it
|
|
* has been registered multiple times (multiple listeners on the
|
|
* same, or a "shards" directive on the line). There cannot be
|
|
* multiple listeners on one FD but at least we can create a
|
|
* new one from the original one. We won't reconfigure it,
|
|
* however, as this was already done for the first one.
|
|
*/
|
|
fd = dup(fd);
|
|
if (fd == -1) {
|
|
err |= ERR_RETRYABLE | ERR_ALERT;
|
|
memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
|
|
goto bind_return;
|
|
}
|
|
}
|
|
|
|
if (fd >= global.maxsock) {
|
|
err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
|
|
memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
|
|
goto bind_close_return;
|
|
}
|
|
|
|
if (fd_set_nonblock(fd) == -1) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "cannot make socket non-blocking");
|
|
goto bind_close_return;
|
|
}
|
|
|
|
if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
|
/* note that bind() creates the socket <tempname> on the file system */
|
|
if (errno == EADDRINUSE) {
|
|
/* the old process might still own it, let's retry */
|
|
err |= ERR_RETRYABLE | ERR_ALERT;
|
|
memprintf(errmsg, "cannot bind UNIX socket (already in use)");
|
|
goto bind_close_return;
|
|
}
|
|
else {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno));
|
|
goto bind_close_return;
|
|
}
|
|
}
|
|
|
|
/* <uid> and <gid> different of -1 will be used to change the socket owner.
|
|
* If <mode> is not 0, it will be used to restrict access to the socket.
|
|
* While it is known not to be portable on every OS, it's still useful
|
|
* where it works. We also don't change permissions on abstract sockets.
|
|
*/
|
|
if (!ext && path[0] &&
|
|
(((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) &&
|
|
(chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) ||
|
|
(rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno));
|
|
goto err_unlink_temp;
|
|
}
|
|
|
|
/* Point of no return: we are ready, we'll switch the sockets. We don't
|
|
* fear losing the socket <path> because we have a copy of it in
|
|
* backname. Abstract sockets are not renamed.
|
|
*/
|
|
if (!ext && path[0] && rename(tempname, path) < 0) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno));
|
|
goto err_rename;
|
|
}
|
|
|
|
/* Cleanup: only unlink if we didn't inherit the fd from the parent */
|
|
if (!ext && path[0])
|
|
unlink(backname);
|
|
|
|
rx->fd = fd;
|
|
rx->flags |= RX_F_BOUND;
|
|
|
|
if (!path[0]) {
|
|
/* ABNS sockets do not support suspend, and they conflict with
|
|
* other ones (no reuseport), so they must always be unbound.
|
|
*/
|
|
rx->flags |= RX_F_NON_SUSPENDABLE;
|
|
}
|
|
|
|
fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread);
|
|
|
|
/* for now, all regularly bound TCP listeners are exportable */
|
|
if (!(rx->flags & RX_F_INHERITED))
|
|
HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
|
|
|
|
return err;
|
|
|
|
err_rename:
|
|
ret = rename(backname, path);
|
|
if (ret < 0 && errno == ENOENT)
|
|
unlink(path);
|
|
err_unlink_temp:
|
|
if (!ext && path[0])
|
|
unlink(tempname);
|
|
close(fd);
|
|
err_unlink_back:
|
|
if (!ext && path[0])
|
|
unlink(backname);
|
|
bind_return:
|
|
if (errmsg && *errmsg) {
|
|
if (!ext) {
|
|
char *path_str;
|
|
|
|
path_str = sa2str((struct sockaddr_storage *)&rx->addr, 0, 0);
|
|
memprintf(errmsg, "%s [%s]", *errmsg, ((path_str) ? path_str : ""));
|
|
ha_free(&path_str);
|
|
}
|
|
else
|
|
memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
|
|
}
|
|
bind_ret_err:
|
|
return err;
|
|
|
|
bind_close_return:
|
|
close(fd);
|
|
goto bind_return;
|
|
}
|