diff --git a/doc/configuration.txt b/doc/configuration.txt index 8c620d6ff..8b8f068ce 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -2273,20 +2273,29 @@ set-var-fmt set-var-fmt proc.bootid "%pid|%t" setcap [,...] - Sets a list of capabilities that must be preserved when starting with uid 0 - and switching to a non-zero uid. By default all permissions are lost by the - uid switch, but some are often needed when trying connecting to a server from - a foreign address during transparent proxying, or when binding to a port - below 1024, e.g. when using "tune.quic.socket-owner connection", resulting in - setups running entirely under uid 0. Setting capabilities generally is a - safer alternative, as only the required capabilities will be preserved. The - feature is OS-specific and only enabled on Linux when USE_LINUX_CAP=1 is set - at build time. The list of supported capabilities also depends on the OS and - is enumerated by the error message displayed when an invalid capability name - or an empty one is passed. Multiple capabilities may be passed, delimited by - commas. Among those commonly used, "cap_net_raw" allows to transparently bind - to a foreign address, and "cap_net_bind_service" allows to bind to a - privileged port and may be used by QUIC. + Sets a list of capabilities that must be preserved when starting and running + either as a non-root user (uid > 0), or when starting with uid 0 (root) + and switching then to a non-root. By default all permissions are + lost by the uid switch, but some are often needed when trying to connect to + a server from a foreign address during transparent proxying, or when binding + to a port below 1024, e.g. when using "tune.quic.socket-owner connection", + resulting in setups running entirely under uid 0. Setting capabilities + generally is a safer alternative, as only the required capabilities will be + preserved. The feature is OS-specific and only enabled on Linux when + USE_LINUX_CAP=1 is set at build time. The list of supported capabilities also + depends on the OS and is enumerated by the error message displayed when an + invalid capability name or an empty one is passed. Multiple capabilities may + be passed, delimited by commas. Among those commonly used, "cap_net_raw" + allows to transparently bind to a foreign address, and "cap_net_bind_service" + allows to bind to a privileged port and may be used by QUIC. If the process + is started and run under the same non-root user, needed capabilities should + be set on haproxy binary file with setcap along with this keyword. For more + details about setting capabilities on haproxy binary, please see chapter + 13.1 Linux capabilities support in the Management guide. + + Example: + global + setcap cap_net_bind_service,cap_net_admin setenv Sets environment variable to value . If the variable exists, it diff --git a/doc/management.txt b/doc/management.txt index 9ddf103f2..361b721ae 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -36,6 +36,7 @@ Summary 11. Well-known traps to avoid 12. Debugging and performance issues 13. Security considerations +13.1 Linux capabilities support 1. Prerequisites @@ -4563,3 +4564,71 @@ A safe configuration will have : stats socket /var/run/haproxy.stat uid hatop gid hatop mode 600 +13.1 Linux capabilities support +------------------------------ + +Since version v2.9 haproxy supports Linux capabilities. If the binary is +compiled with USE_LINUX_CAP=1, it is able to preserve capabilities given in +'setcap' keyword during switching from root user to a non-root. + +Since version v3.1 haproxy also checks if capabilities given in 'setcap' +keyword were set in its binary file permitted set by administrator +(capget syscall). If this a case it performs transition of these capabilities +in its process effective set (capset syscall), while running as a non-root +user. + +This was done to avoid all potential use cases when haproxy starts and runs as +root: transparent proxy mode, binding to priviledged ports. + +'setcap' keyword supports following network capabilities: +- cap_net_admin +- cap_net_raw (subset of cap_net_admin) +- cap_net_bind_service + +Haproxy never does the transition of these capabilities from its permitted set +to the effective, if they are not listed as 'setcap' argument. See more +information about 'setcap' keyword and supported capabilities in the chapter +3.1 Process management and security in the Configuration guide. + +Administrator may add needed capabilities in the haproxy binary file permitted +set with the following command: + +Example: + # setcap cap_net_admin,cap_net_bind_service=p /usr/local/sbin/haproxy + +Added capabilities will be seen in process permitted set after its start. +If the same capabilities are the arguments of 'setcap' keyword, they could be +also seen in the process effective set. This could be check with the following +command: + +Example: + # grep Cap /proc//status + CapInh: 0000000000000000 + CapPrm: 0000000000001400 + CapEff: 0000000000001400 + CapBnd: 000001ffffffffff + CapAmb: 0000000000000000 + +See more details about setcap and capabilities sets in Linux man pages +(capabilities(7)). + +In some cases like transparent proxying, binding socket to a specific network +interface, using set-mark action, configuration file parser detects that +cap_net_admin or cap_net_raw capabilities are needed. Then, during +initialization stage, haproxy process checks, if these capabilities could be +put in its effective set. If it's not possible due to capget or capset syscall +failure (restrictions set on syscalls by some security modules like SELinux, +Seccomp, etc), process emits diagnostic warnings (start with -dD). + +Due to support of many different platforms with different system settings, +it's impossible for the parser to deduce from the configuration file, if +binding to privileged ports will be done. So, in the case of insufficient +priviledges (run as non-root) process will terminate only with an alert +message like below. It's up to a user to recheck its configuration and +capabilities set for haproxy binary. + +Example: + $ haproxy -dD -f haproxy.cfg + ... + [ALERT] (96797) : Binding [haproxy.cfg:36] for frontend fe: cannot bind socket (Permission denied) for [0.0.0.0:80] + [ALERT] (96797) : [haproxy.main()] Some protocols failed to start their listeners! Exiting. diff --git a/include/haproxy/linuxcap.h b/include/haproxy/linuxcap.h index 9c337a40c..486d85f66 100644 --- a/include/haproxy/linuxcap.h +++ b/include/haproxy/linuxcap.h @@ -3,5 +3,6 @@ int prepare_caps_for_setuid(int from_uid, int to_uid); int finalize_caps_after_setuid(int from_uid, int to_uid); +int prepare_caps_from_permitted_set(); #endif /* _HAPROXY_LINUXCAP_H */ diff --git a/src/haproxy.c b/src/haproxy.c index 753ef2b16..587295734 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -3496,6 +3496,14 @@ int main(int argc, char **argv) #endif } +#if defined(USE_LINUX_CAP) + /* If CAP_NET_BIND_SERVICE is in binary file permitted set and process + * is started and run under the same non-root user, this allows + * binding to priviledged ports. + */ + prepare_caps_from_permitted_set(geteuid(), global.uid, argv[0]); +#endif + /* Try to get the listeners FD from the previous process using * _getsocks on the stat socket, it must never been done in wait mode * and check mode diff --git a/src/linuxcap.c b/src/linuxcap.c index 4a2a3ab04..7058370de 100644 --- a/src/linuxcap.c +++ b/src/linuxcap.c @@ -44,6 +44,12 @@ static const struct { { 0, 0 } }; +/* provided by sys/capability.h on some distros */ +static inline int capget(cap_user_header_t hdrp, const cap_user_data_t datap) +{ + return syscall(SYS_capget, hdrp, datap); +} + /* provided by sys/capability.h on some distros */ static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) { @@ -53,6 +59,78 @@ static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) /* defaults to zero, i.e. we don't keep any cap after setuid() */ static uint32_t caplist; +/* try to check if CAP_NET_ADMIN or CAP_NET_RAW are in the process effective + * set in the case when euid is non-root. If there is a match, + * LSTCHK_NETADM is unset from global.last_checks to avoid warning due to + * global.last_checks verifications later in the init process. + * If there is no CAP_NET_ADMIN, nor CAP_NET_RAW in the effective set, try to + * check process permitted set. In this case we promote from permitted set to + * effective only the capabilities, that were marked by user via 'capset' + * keyword in the global section (caplist). If there is match with + * caplist and CAP_NET_ADMIN or/and CAP_NET_RAW in this caplist, LSTCHK_NETADM + * will be unset by the same reason. + * We do this only if the current euid is non-root and there is no global.uid. + * Otherwise the process will continue either to run under root, or it will do + * a transition to unpriviledged user later in prepare_caps_for_setuid(), + * which specially manages its capabilities in that case. + * Always returns 0. Diagnostic warnings will be emitted only, if + * LSTCHK_NETADM is presented in LSTCHK_NETADM and some failures are + * encountered. + */ +int prepare_caps_from_permitted_set(int from_uid, int to_uid, const char *program_name) +{ + struct __user_cap_data_struct start_cap_data = { }; + struct __user_cap_header_struct cap_hdr = { + .pid = 0, /* current process */ + .version = _LINUX_CAPABILITY_VERSION_1, + }; + + /* started as root */ + if (!from_uid) + return 0; + + /* will change ruid and euid later in set_identity() */ + if (to_uid) + return 0; + + /* first, let's check if CAP_NET_ADMIN or CAP_NET_RAW is already in + * the process effective set. This may happen, when administrator sets + * these capabilities and the file effective bit on haproxy binary via + * setcap, see capabilities man page for details. + */ + if (capget(&cap_hdr, &start_cap_data) == -1) { + if (global.last_checks & LSTCHK_NETADM) + ha_diag_warning("Failed to get process capabilities using capget(): %s. " + "Can't use capabilities that might be set on %s binary " + "by administrator.\n", strerror(errno), program_name); + return 0; + } + + if (start_cap_data.effective & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) { + global.last_checks &= ~LSTCHK_NETADM; + return 0; + } + + /* second, try to check process permitted set, in this case caplist is + * necessary. Allows to put cap_net_bind_service in process effective + * set, if it is in the caplist and also presented in the binary + * permitted set. + */ + if (caplist && start_cap_data.permitted & caplist) { + start_cap_data.effective |= start_cap_data.permitted & caplist; + if (capset(&cap_hdr, &start_cap_data) == 0) { + if (caplist & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) + global.last_checks &= ~LSTCHK_NETADM; + } else if (global.last_checks & LSTCHK_NETADM) { + ha_diag_warning("Failed to put capabilities from caplist in %s " + "process effective capabilities set using capset(): %s\n", + program_name, strerror(errno)); + } + } + + return 0; +} + /* try to apply capabilities before switching UID from to . * In practice we need to do this in 4 steps: * - set PR_SET_KEEPCAPS to preserve caps across the final setuid()