haproxy/src/haproxy.c
Willy Tarreau 2454d6ef5b [RELEASE] Released version 2.6-dev1
Released version 2.6-dev1 with the following main changes :
    - BUG/MINOR: cache: Fix loop on cache entries in "show cache"
    - BUG/MINOR: httpclient: allow to replace the host header
    - BUG/MINOR: lua: don't expose internal proxies
    - MEDIUM: mworker: seamless reload use the internal sockpairs
    - BUG/MINOR: lua: remove loop initial declarations
    - BUG/MINOR: mworker: does not add the -sf in wait mode
    - BUG/MEDIUM: mworker: FD leak of the eventpoll in wait mode
    - MINOR: quic: do not reject PADDING followed by other frames
    - REORG: quic: add comment on rare thread concurrence during CID alloc
    - CLEANUP: quic: add comments on CID code
    - MEDIUM: quic: handle CIDs to rattach received packets to connection
    - MINOR: qpack: support litteral field line with non-huff name
    - MINOR: quic: activate QUIC traces at compilation
    - MINOR: quic: use more verbose QUIC traces set at compile-time
    - MEDIUM: pool: refactor malloc_trim/glibc and jemalloc api addition detections.
    - MEDIUM: pool: support purging jemalloc arenas in trim_all_pools()
    - BUG/MINOR: mworker: deinit of thread poller was called when not initialized
    - BUILD: pools: only detect link-time jemalloc on ELF platforms
    - CI: github actions: add the output of $CC -dM -E-
    - BUG/MEDIUM: cli: Properly set stream analyzers to process one command at a time
    - BUILD: evports: remove a leftover from the dead_fd cleanup
    - MINOR: quic: Set "no_application_protocol" alert
    - MINOR: quic: More accurate immediately close.
    - MINOR: quic: Immediately close if no transport parameters extension found
    - MINOR: quic: Rename qc_prep_hdshk_pkts() to qc_prep_pkts()
    - MINOR: quic: Possible crash when inspecting the xprt context
    - MINOR: quic: Dynamically allocate the secrete keys
    - MINOR: quic: Add a function to derive the key update secrets
    - MINOR: quic: Add structures to maintain key phase information
    - MINOR: quic: Optional header protection key for quic_tls_derive_keys()
    - MINOR: quic: Add quic_tls_key_update() function for Key Update
    - MINOR: quic: Enable the Key Update process
    - MINOR: quic: Delete the ODCIDs asap
    - BUG/MINOR: vars: Fix the set-var and unset-var converters
    - MEDIUM: pool: Following up on previous pool trimming update.
    - BUG/MEDIUM: mux-h1: Fix splicing by properly detecting end of message
    - BUG/MINOR: mux-h1: Fix splicing for messages with unknown length
    - MINOR: mux-h1: Improve H1 traces by adding info about http parsers
    - MINOR: mux-h1: register a stats module
    - MINOR: mux-h1: add counters instance to h1c
    - MINOR: mux-h1: count open connections/streams on stats
    - MINOR: mux-h1: add stat for total count of connections/streams
    - MINOR: mux-h1: add stat for total amount of bytes received and sent
    - REGTESTS: h1: Add a script to validate H1 splicing support
    - BUG/MINOR: server: Don't rely on last default-server to init server SSL context
    - BUG/MEDIUM: resolvers: Detach query item on response error
    - MEDIUM: resolvers: No longer store query items in a list into the response
    - BUG/MAJOR: segfault using multiple log forward sections.
    - BUG/MEDIUM: h1: Properly reset h1m flags when headers parsing is restarted
    - BUG/MINOR: resolvers: Don't overwrite the error for invalid query domain name
    - BUILD: bug: Fix error when compiling with -DDEBUG_STRICT_NOCRASH
    - BUG/MEDIUM: sample: Fix memory leak in sample_conv_jwt_member_query
    - DOC: spoe: Clarify use of the event directive in spoe-message section
    - DOC: config: Specify %Ta is only available in HTTP mode
    - BUILD: tree-wide: avoid warnings caused by redundant checks of obj_types
    - IMPORT: slz: use the correct CRC32 instruction when running in 32-bit mode
    - MINOR: quic: fix segfault on CONNECTION_CLOSE parsing
    - MINOR: h3: add BUG_ON on control receive function
    - MEDIUM: xprt-quic: finalize app layer initialization after ALPN nego
    - MINOR: h3: remove duplicated FIN flag position
    - MAJOR: mux-quic: implement a simplified mux version
    - MEDIUM: mux-quic: implement release mux operation
    - MEDIUM: quic: detect the stream FIN
    - MINOR: mux-quic: implement subscribe on stream
    - MEDIUM: mux-quic: subscribe on xprt if remaining data after send
    - MEDIUM: mux-quic: wake up xprt on data transferred
    - MEDIUM: mux-quic: handle when sending buffer is full
    - MINOR: quic: RX buffer full due to wrong CRYPTO data handling
    - MINOR: quic: Race issue when consuming RX packets buffer
    - MINOR: quic: QUIC encryption level RX packets race issue
    - MINOR: quic: Delete remaining RX handshake packets
    - MINOR: quic: Remove QUIC TX packet length evaluation function
    - MINOR: hq-interop: fix tx buffering
    - MINOR: mux-quic: remove uneeded code to check fin on TX
    - MINOR: quic: add HTX EOM on request end
    - BUILD: mux-quic: fix compilation with DEBUG_MEM_STATS
    - MINOR: http-rules: Add capture action to http-after-response ruleset
    - BUG/MINOR: cli/server: Don't crash when a server is added with a custom id
    - MINOR: mux-quic: do not release qcs if there is remaining data to send
    - MINOR: quic: notify the mux on CONNECTION_CLOSE
    - BUG/MINOR: mux-quic: properly initialize flow control
    - MINOR: quic: Compilation fix for quic_rx_packet_refinc()
    - MINOR: h3: fix possible invalid dereference on htx parsing
    - DOC: config: retry-on list is space-delimited
    - DOC: config: fix error-log-format example
    - BUG/MEDIUM: mworker/cli: crash when trying to access an old PID in prompt mode
    - MINOR: hq-interop: refix tx buffering
    - REGTESTS: ssl: use X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY for cert check
    - MINOR: cli: "show version" displays the current process version
    - CLEANUP: cfgparse: modify preprocessor guards around numa detection code
    - MEDIUM: cfgparse: numa detect topology on FreeBSD.
    - BUILD: ssl: unbreak the build with newer libressl
    - MINOR: vars: Move UPDATEONLY flag test to vars_set_ifexist
    - MINOR: vars: Set variable type to ANY upon creation
    - MINOR: vars: Delay variable content freeing in var_set function
    - MINOR: vars: Parse optional conditions passed to the set-var converter
    - MINOR: vars: Parse optional conditions passed to the set-var actions
    - MEDIUM: vars: Enable optional conditions to set-var converter and actions
    - DOC: vars: Add documentation about the set-var conditions
    - REGTESTS: vars: Add new test for conditional set-var
    - MINOR: quic: Attach timer task to thread for the connection.
    - CLEANUP: quic_frame: Remove a useless suffix to STOP_SENDING
    - MINOR: quic: Add traces for STOP_SENDING frame and modify others
    - CLEANUP: quic: Remove cdata_len from quic_tx_packet struct
    - MINOR: quic: Enable TLS 0-RTT if needed
    - MINOR: quic: No TX secret at EARLY_DATA encryption level
    - MINOR: quic: Add quic_set_app_ops() function
    - MINOR: ssl_sock: Set the QUIC application from ssl_sock_advertise_alpn_protos.
    - MINOR: quic: Make xprt support 0-RTT.
    - MINOR: qpack: Missing check for truncated QPACK fields
    - CLEANUP: quic: Comment fix for qc_strm_cpy()
    - MINOR: hq_interop: Stop BUG_ON() truncated streams
    - MINOR: quic: Do not mix packet number space and connection flags
    - CLEANUP: quic: Shorten a litte bit the traces in lstnr_rcv_pkt()
    - MINOR: mux-quic: fix trace on stream creation
    - CLEANUP: quic: fix spelling mistake in a trace
    - CLEANUP: quic: rename quic_conn conn to qc in quic_conn_free
    - MINOR: quic: add missing lock on cid tree
    - MINOR: quic: rename constant for haproxy CIDs length
    - MINOR: quic: refactor concat DCID with address for Initial packets
    - MINOR: quic: compare coalesced packets by DCID
    - MINOR: quic: refactor DCID lookup
    - MINOR: quic: simplify the removal from ODCID tree
    - REGTESTS: vars: Remove useless ssl tunes from conditional set-var test
    - MINOR: ssl: Remove empty lines from "show ssl ocsp-response" output
    - MINOR: quic: Increase the RX buffer for each connection
    - MINOR: quic: Add a function to list remaining RX packets by encryption level
    - MINOR: quic: Stop emptying the RX buffer asap.
    - MINOR: quic: Do not expect to receive only one O-RTT packet
    - MINOR: quic: Do not forget STREAM frames received in disorder
    - MINOR: quic: Wrong packet refcount handling in qc_pkt_insert()
    - DOC: fix misspelled keyword "resolve_retries" in resolvers
    - CLEANUP: quic: rename quic_conn instances to qc
    - REORG: quic: move mux function outside of xprt
    - MINOR: quic: add reference to quic_conn in ssl context
    - MINOR: quic: add const qualifier for traces function
    - MINOR: trace: add quic_conn argument definition
    - MINOR: quic: use quic_conn as argument to traces
    - MINOR: quic: add quic_conn instance in traces for qc_new_conn
    - MINOR: quic: Add stream IDs to qcs_push_frame() traces
    - MINOR: quic: unchecked qc_retrieve_conn_from_cid() returned value
    - MINOR: quic: Wrong dropped packet skipping
    - MINOR: quic: Handle the cases of overlapping STREAM frames
    - MINOR: quic: xprt traces fixes
    - MINOR: quic: Drop asap Retry or Version Negotiation packets
    - MINOR: pools: work around possibly slow malloc_trim() during gc
    - DEBUG: ssl: make sure we never change a servername on established connections
    - MINOR: quic: Add traces for RX frames (flow control related)
    - MINOR: quic: Add CONNECTION_CLOSE phrase to trace
    - REORG: quic: remove qc_ prefix on functions which not used it directly
    - BUG/MINOR: quic: upgrade rdlock to wrlock for ODCID removal
    - MINOR: quic: remove unnecessary call to free_quic_conn_cids()
    - MINOR: quic: store ssl_sock_ctx reference into quic_conn
    - MINOR: quic: remove unnecessary if in qc_pkt_may_rm_hp()
    - MINOR: quic: replace usage of ssl_sock_ctx by quic_conn
    - MINOR: quic: delete timer task on quic_close()
    - MEDIUM: quic: implement refcount for quic_conn
    - BUG/MINOR: quic: fix potential null dereference
    - BUG/MINOR: quic: fix potential use of uninit pointer
    - BUG/MEDIUM: backend: fix possible sockaddr leak on redispatch
    - BUG/MEDIUM: peers: properly skip conn_cur from incoming messages
    - CI: Github Actions: do not show VTest failures if build failed
    - BUILD: opentracing: display warning in case of using OT_USE_VARS at compile time
    - MINOR: compat: detect support for dl_iterate_phdr()
    - MINOR: debug: add ability to dump loaded shared libraries
    - MINOR: debug: add support for -dL to dump library names at boot
    - BUG/MEDIUM: ssl: initialize correctly ssl w/ default-server
    - REGTESTS: ssl: fix ssl_default_server.vtc
    - BUG/MINOR: ssl: free the fields in srv->ssl_ctx
    - BUG/MEDIUM: ssl: free the ckch instance linked to a server
    - REGTESTS: ssl: update of a crt with server deletion
    - BUILD/MINOR: cpuset FreeBSD 14 build fix.
    - MINOR: pools: always evict oldest objects first in pool_evict_from_local_cache()
    - DOC: pool: document the purpose of various structures in the code
    - CLEANUP: pools: do not use the extra pointer to link shared elements
    - CLEANUP: pools: get rid of the POOL_LINK macro
    - MINOR: pool: allocate from the shared cache through the local caches
    - CLEANUP: pools: group list updates in pool_get_from_cache()
    - MINOR: pool: rely on pool_free_nocache() in pool_put_to_shared_cache()
    - MINOR: pool: make pool_is_crowded() always true when no shared pools are used
    - MINOR: pool: check for pool's fullness outside of pool_put_to_shared_cache()
    - MINOR: pool: introduce pool_item to represent shared pool items
    - MINOR: pool: add a function to estimate how many may be released at once
    - MEDIUM: pool: compute the number of evictable entries once per pool
    - MINOR: pools: prepare pool_item to support chained clusters
    - MINOR: pools: pass the objects count to pool_put_to_shared_cache()
    - MEDIUM: pools: centralize cache eviction in a common function
    - MEDIUM: pools: start to batch eviction from local caches
    - MEDIUM: pools: release cached objects in batches
    - OPTIM: pools: reduce local pool cache size to 512kB
    - CLEANUP: assorted typo fixes in the code and comments This is 29th iteration of typo fixes
    - CI: github actions: update OpenSSL to 3.0.1
    - BUILD/MINOR: tools: solaris build fix on dladdr.
    - BUG/MINOR: cli: fix _getsocks with musl libc
    - BUG/MEDIUM: http-ana: Preserve response's FLT_END analyser on L7 retry
    - MINOR: quic: Wrong traces after rework
    - MINOR: quic: Add trace about in flight bytes by packet number space
    - MINOR: quic: Wrong first packet number space computation
    - MINOR: quic: Wrong packet number space computation for PTO
    - MINOR: quic: Wrong loss time computation in qc_packet_loss_lookup()
    - MINOR: quic: Wrong ack_delay compution before calling quic_loss_srtt_update()
    - MINOR: quic: Remove nb_pto_dgrams quic_conn struct member
    - MINOR: quic: Wrong packet number space trace in qc_prep_pkts()
    - MINOR: quic: Useless test in qc_prep_pkts()
    - MINOR: quic: qc_prep_pkts() code moving
    - MINOR: quic: Speeding up Handshake Completion
    - MINOR: quic: Probe Initial packet number space more often
    - MINOR: quic: Probe several packet number space upon timer expiration
    - MINOR: quic: Comment fix.
    - MINOR: quic: Improve qc_prep_pkts() flexibility
    - MINOR: quic: Do not drop secret key but drop the CRYPTO data
    - MINOR: quic: Prepare Handshake packets asap after completed handshake
    - MINOR: quic: Flag asap the connection having reached the anti-amplification limit
    - MINOR: quic: PTO timer too often reset
    - MINOR: quic: Re-arm the PTO timer upon datagram receipt
    - MINOR: proxy: add option idle-close-on-response
    - MINOR: cpuset: switch to sched_setaffinity for FreeBSD 14 and above.
    - CI: refactor spelling check
    - CLEANUP: assorted typo fixes in the code and comments
    - BUILD: makefile: add -Wno-atomic-alignment to work around clang abusive warning
    - MINOR: quic: Only one CRYPTO frame by encryption level
    - MINOR: quic: Missing retransmission from qc_prep_fast_retrans()
    - MINOR: quic: Non-optimal use of a TX buffer
    - BUG/MEDIUM: mworker: don't use _getsocks in wait mode
    - BUG/MINOR: ssl: Store client SNI in SSL context in case of ClientHello error
    - BUG/MAJOR: mux-h1: Don't decrement .curr_len for unsent data
    - DOC: internals: document the pools architecture and API
    - CI: github actions: clean default step conditions
    - BUILD: cpuset: fix build issue on macos introduced by previous change
    - MINOR: quic: Remaining TRACEs with connection as firt arg
    - MINOR: quic: Reset ->conn quic_conn struct member when calling qc_release()
    - MINOR: quic: Flag the connection as being attached to a listener
    - MINOR: quic: Wrong CRYPTO frame concatenation
    - MINOR: quid: Add traces quic_close() and quic_conn_io_cb()
    - REGTESTS: ssl: Fix ssl_errors regtest with OpenSSL 1.0.2
    - MINOR: quic: Do not dereference ->conn quic_conn struct member
    - MINOR: quic: fix return of quic_dgram_read
    - MINOR: quic: add config parse source file
    - MINOR: quic: implement Retry TLS AEAD tag generation
    - MEDIUM: quic: implement Initial token parsing
    - MINOR: quic: define retry_source_connection_id TP
    - MEDIUM: quic: implement Retry emission
    - MINOR: quic: free xprt tasklet on its thread
    - BUG/MEDIUM: connection: properly leave stopping list on error
    - MINOR: pools: enable pools with DEBUG_FAIL_ALLOC as well
    - MINOR: quic: As server, skip 0-RTT packet number space
    - MINOR: quic: Do not wakeup the I/O handler before the mux is started
    - BUG/MEDIUM: htx: Adjust length to add DATA block in an empty HTX buffer
    - CI: github actions: use cache for OpenTracing
    - BUG/MINOR: httpclient: don't send an empty body
    - BUG/MINOR: httpclient: set default Accept and User-Agent headers
    - BUG/MINOR: httpclient/lua: don't pop the lua stack when getting headers
    - BUILD/MINOR: fix solaris build with clang.
    - BUG/MEDIUM: server: avoid changing healthcheck ctx with set server ssl
    - CI: refactor OpenTracing build script
    - DOC: management: mark "set server ssl" as deprecated
    - MEDIUM: cli: yield between each pipelined command
    - MINOR: channel: add new function co_getdelim() to support multiple delimiters
    - BUG/MINOR: cli: avoid O(bufsize) parsing cost on pipelined commands
    - MEDIUM: h2/hpack: emit a Dynamic Table Size Update after settings change
    - MINOR: quic: Retransmit the TX frames in the same order
    - MINOR: quic: Remove the packet number space TX MT_LIST
    - MINOR: quic: Splice the frames which could not be added to packets
    - MINOR: quic: Add the number of TX bytes to traces
    - CLEANUP: quic: Replace <nb_pto_dgrams> by <probe>
    - MINOR: quic: Send two ack-eliciting packets when probing packet number spaces
    - MINOR: quic: Probe regardless of the congestion control
    - MINOR: quic: Speeding up handshake completion
    - MINOR: quic: Release RX Initial packets asap
    - MINOR: quic: Release asap TX frames to be transmitted
    - MINOR: quic: Probe even if coalescing
    - BUG/MEDIUM: cli: Never wait for more data on client shutdown
    - BUG/MEDIUM: mcli: do not try to parse empty buffers
    - BUG/MEDIUM: mcli: always realign wrapping buffers before parsing them
    - BUG/MINOR: stream: make the call_rate only count the no-progress calls
    - MINOR: quic: do not use quic_conn after dropping it
    - MINOR: quic: adjust quic_conn refcount decrement
    - MINOR: quic: fix race-condition on xprt tasklet free
    - MINOR: quic: free SSL context on quic_conn free
    - MINOR: quic: Add QUIC_FT_RETIRE_CONNECTION_ID parsing case
    - MINOR: quic: Wrong packet number space selection
    - DEBUG: pools: add new build option DEBUG_POOL_INTEGRITY
    - MINOR: quic: add missing include in quic_sock
    - MINOR: quic: fix indentation in qc_send_ppkts
    - MINOR: quic: remove dereferencement of connection when possible
    - MINOR: quic: set listener accept cb on parsing
    - MEDIUM: quic/ssl: add new ex data for quic_conn
    - MINOR: quic: initialize ssl_sock_ctx alongside the quic_conn
    - MINOR: ssl: fix build in release mode
    - MINOR: pools: partially uninline pool_free()
    - MINOR: pools: partially uninline pool_alloc()
    - MINOR: pools: prepare POOL_EXTRA to be split into multiple extra fields
    - MINOR: pools: extend pool_cache API to pass a pointer to a caller
    - DEBUG: pools: add new build option DEBUG_POOL_TRACING
    - DEBUG: cli: add a new "debug dev fd" expert command
    - MINOR: fd: register the write side of the poller pipe as well
    - CI: github actions: use cache for SSL libs
    - BUILD: debug/cli: condition test of O_ASYNC to its existence
    - BUILD: pools: fix build error on DEBUG_POOL_TRACING
    - MINOR: quic: refactor header protection removal
    - MINOR: quic: handle app data according to mux/connection layer status
    - MINOR: quic: refactor app-ops initialization
    - MINOR: receiver: define a flag for local accept
    - MEDIUM: quic: flag listener for local accept
    - MINOR: quic: do not manage connection in xprt snd_buf
    - MINOR: quic: remove wait handshake/L6 flags on init connection
    - MINOR: listener: add flags field
    - MINOR: quic: define QUIC flag on listener
    - MINOR: quic: create accept queue for QUIC connections
    - MINOR: listener: define per-thr struct
    - MAJOR: quic: implement accept queue
    - CLEANUP: mworker: simplify mworker_free_child()
    - BUILD/DEBUG: lru: update the standalone code to support the revision
    - DEBUG: lru: use a xorshift generator in the testing code
    - BUG/MAJOR: compiler: relax alignment constraints on certain structures
    - BUG/MEDIUM: fd: always align fdtab[] to 64 bytes
    - MINOR: quic: No DCID length for datagram context
    - MINOR: quic: Comment fix about the token found in Initial packets
    - MINOR: quic: Get rid of a struct buffer in quic_lstnr_dgram_read()
    - MINOR: quic: Remove the QUIC haproxy server packet parser
    - MINOR: quic: Add new defintion about DCIDs offsets
    - MINOR: quic: Add a list to QUIC sock I/O handler RX buffer
    - MINOR: quic: Allocate QUIC datagrams from sock I/O handler
    - MINOR: proto_quic: Allocate datagram handlers
    - MINOR: quic: Pass CID as a buffer to quic_get_cid_tid()
    - MINOR: quic: Convert quic_dgram_read() into a task
    - CLEANUP: quic: Remove useless definition
    - MINOR: proto_quic: Wrong allocations for TX rings and RX bufs
    - MINOR: quic: Do not consume the RX buffer on QUIC sock i/o handler side
    - MINOR: quic: Do not reset a full RX buffer
    - MINOR: quic: Attach all the CIDs to the same connection
    - MINOR: quic: Make usage of by datagram handler trees
    - MEDIUM: da: new optional data file download scheduler service.
    - MEDIUM: da: update doc and build for new scheduler mode service.
    - MEDIUM: da: update module to handle schedule mode.
    - MINOR: quic: Drop Initial packets with wrong ODCID
    - MINOR: quic: Wrong RX buffer tail handling when no more contiguous data
    - MINOR: quic: Iterate over all received datagrams
    - MINOR: quic: refactor quic CID association with threads
    - BUG/MEDIUM: resolvers: Really ignore trailing dot in domain names
    - DEV: flags: Add missing flags
    - BUG/MINOR: sink: Use the right field in appctx context in release callback
    - MINOR: sock: move the unused socket cleaning code into its own function
    - BUG/MEDIUM: mworker: close unused transferred FDs on load failure
    - BUILD: atomic: make the old HA_ATOMIC_LOAD() support const pointers
    - BUILD: cpuset: do not use const on the source of CPU_AND/CPU_ASSIGN
    - BUILD: checks: fix inlining issue on set_srv_agent_[addr,port}
    - BUILD: vars: avoid overlapping field initialization
    - BUILD: server-state: avoid using not-so-portable isblank()
    - BUILD: mux_fcgi: avoid aliasing of a const struct in traces
    - BUILD: tree-wide: mark a few numeric constants as explicitly long long
    - BUILD: tools: fix warning about incorrect cast with dladdr1()
    - BUILD: task: use list_to_mt_list() instead of casting list to mt_list
    - BUILD: mworker: include tools.h for platforms without unsetenv()
    - BUG/MINOR: mworker: fix a FD leak of a sockpair upon a failed reload
    - MINOR: mworker: set the master side of ipc_fd in the worker to -1
    - MINOR: mworker: allocate and initialize a mworker_proc
    - CI: Consistently use actions/checkout@v2
    - REGTESTS: Remove REQUIRE_VERSION=1.8 from all tests
    - MINOR: mworker: sets used or closed worker FDs to -1
    - MINOR: quic: Try to accept 0-RTT connections
    - MINOR: quic: Do not try to treat 0-RTT packets without started mux
    - MINOR: quic: Do not try to accept a connection more than one time
    - MINOR: quic: Initialize the connection timer asap
    - MINOR: quic: Do not use connection struct xprt_ctx too soon
    - Revert "MINOR: mworker: sets used or closed worker FDs to -1"
    - BUILD: makefile: avoid testing all -Wno-* options when not needed
    - BUILD: makefile: validate support for extra warnings by batches
    - BUILD: makefile: only compute alternative options if required
    - DEBUG: fd: make sure we never try to insert/delete an impossible FD number
    - MINOR: mux-quic: add comment
    - MINOR: mux-quic: properly initialize qcc flags
    - MINOR: mux-quic: do not consider CONNECTION_CLOSE for the moment
    - MINOR: mux-quic: create a timeout task
    - MEDIUM: mux-quic: delay the closing with the timeout
    - MINOR: mux-quic: release idle conns on process stopping
    - MINOR: listener: replace the listener's spinlock with an rwlock
    - BUG/MEDIUM: listener: read-lock the listener during accept()
    - MINOR: mworker/cli: set expert/experimental mode from the CLI
2022-02-01 18:06:59 +01:00

3442 lines
97 KiB
C

/*
* HAProxy : High Availability-enabled HTTP/TCP proxy
* Copyright 2000-2022 Willy Tarreau <willy@haproxy.org>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Please refer to RFC7230 - RFC7235 information about HTTP protocol, and
* RFC6265 for information about cookies usage. More generally, the IETF HTTP
* Working Group's web site should be consulted for protocol related changes :
*
* http://ftp.ics.uci.edu/pub/ietf/http/
*
* Pending bugs (may be not fixed because never reproduced) :
* - solaris only : sometimes, an HTTP proxy with only a dispatch address causes
* the proxy to terminate (no core) if the client breaks the connection during
* the response. Seen on 1.1.8pre4, but never reproduced. May not be related to
* the snprintf() bug since requests were simple (GET / HTTP/1.0), but may be
* related to missing setsid() (fixed in 1.1.15)
* - a proxy with an invalid config will prevent the startup even if disabled.
*
* ChangeLog has moved to the CHANGELOG file.
*
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
#include <sys/resource.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <time.h>
#include <syslog.h>
#include <grp.h>
#ifdef USE_THREAD
#include <pthread.h>
#endif
#ifdef USE_CPU_AFFINITY
#include <sched.h>
#if defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/param.h>
#ifdef __FreeBSD__
#include <sys/cpuset.h>
#endif
#endif
#endif
#if defined(USE_PRCTL)
#include <sys/prctl.h>
#endif
#if defined(USE_PROCCTL)
#include <sys/procctl.h>
#endif
#ifdef DEBUG_FULL
#include <assert.h>
#endif
#if defined(USE_SYSTEMD)
#include <systemd/sd-daemon.h>
#endif
#include <import/sha1.h>
#include <haproxy/acl.h>
#include <haproxy/action.h>
#include <haproxy/activity.h>
#include <haproxy/api.h>
#include <haproxy/arg.h>
#include <haproxy/auth.h>
#include <haproxy/base64.h>
#include <haproxy/capture-t.h>
#include <haproxy/cfgcond.h>
#include <haproxy/cfgdiag.h>
#include <haproxy/cfgparse.h>
#include <haproxy/chunk.h>
#include <haproxy/cli.h>
#include <haproxy/clock.h>
#include <haproxy/connection.h>
#ifdef USE_CPU_AFFINITY
#include <haproxy/cpuset.h>
#endif
#include <haproxy/dns.h>
#include <haproxy/dynbuf.h>
#include <haproxy/errors.h>
#include <haproxy/fd.h>
#include <haproxy/filters.h>
#include <haproxy/global.h>
#include <haproxy/hlua.h>
#include <haproxy/http_rules.h>
#include <haproxy/list.h>
#include <haproxy/listener.h>
#include <haproxy/log.h>
#include <haproxy/mworker.h>
#include <haproxy/namespace.h>
#include <haproxy/net_helper.h>
#include <haproxy/openssl-compat.h>
#include <haproxy/pattern.h>
#include <haproxy/peers.h>
#include <haproxy/pool.h>
#include <haproxy/protocol.h>
#include <haproxy/proto_tcp.h>
#include <haproxy/proxy.h>
#include <haproxy/regex.h>
#include <haproxy/sample.h>
#include <haproxy/server.h>
#include <haproxy/session.h>
#include <haproxy/signal.h>
#include <haproxy/sock.h>
#include <haproxy/sock_inet.h>
#include <haproxy/ssl_sock.h>
#include <haproxy/stats-t.h>
#include <haproxy/stream.h>
#include <haproxy/task.h>
#include <haproxy/thread.h>
#include <haproxy/time.h>
#include <haproxy/tools.h>
#include <haproxy/uri_auth-t.h>
#include <haproxy/vars.h>
#include <haproxy/version.h>
/* array of init calls for older platforms */
DECLARE_INIT_STAGES;
/* create a read_mostly section to hold variables which are accessed a lot
* but which almost never change. The purpose is to isolate them in their
* own cache lines where they don't risk to be perturbated by write accesses
* to neighbor variables. We need to create an empty aligned variable for
* this. The fact that the variable is of size zero means that it will be
* eliminated at link time if no other variable uses it, but alignment will
* be respected.
*/
empty_t __read_mostly_align HA_SECTION("read_mostly") ALIGNED(64);
#ifdef BUILD_FEATURES
const char *build_features = BUILD_FEATURES;
#else
const char *build_features = "";
#endif
/* list of config files */
static struct list cfg_cfgfiles = LIST_HEAD_INIT(cfg_cfgfiles);
int pid; /* current process id */
volatile unsigned long sleeping_thread_mask = 0; /* Threads that are about to sleep in poll() */
volatile unsigned long stopping_thread_mask = 0; /* Threads acknowledged stopping */
/* global options */
struct global global = {
.hard_stop_after = TICK_ETERNITY,
.numa_cpu_mapping = 1,
.nbthread = 0,
.req_count = 0,
.logsrvs = LIST_HEAD_INIT(global.logsrvs),
.maxzlibmem = 0,
.comp_rate_lim = 0,
.ssl_server_verify = SSL_SERVER_VERIFY_REQUIRED,
.unix_bind = {
.ux = {
.uid = -1,
.gid = -1,
.mode = 0,
}
},
.tune = {
.options = GTUNE_LISTENER_MQ,
.bufsize = (BUFSIZE + 2*sizeof(void *) - 1) & -(2*sizeof(void *)),
.maxrewrite = MAXREWRITE,
.reserved_bufs = RESERVED_BUFS,
.pattern_cache = DEFAULT_PAT_LRU_SIZE,
.pool_low_ratio = 20,
.pool_high_ratio = 25,
.max_http_hdr = MAX_HTTP_HDR,
#ifdef USE_OPENSSL
.sslcachesize = SSLCACHESIZE,
#endif
.comp_maxlevel = 1,
#ifdef DEFAULT_IDLE_TIMER
.idle_timer = DEFAULT_IDLE_TIMER,
#else
.idle_timer = 1000, /* 1 second */
#endif
},
#ifdef USE_OPENSSL
#ifdef DEFAULT_MAXSSLCONN
.maxsslconn = DEFAULT_MAXSSLCONN,
#endif
#endif
/* others NULL OK */
};
/*********************************************************************/
int stopping; /* non zero means stopping in progress */
int killed; /* non zero means a hard-stop is triggered */
int jobs = 0; /* number of active jobs (conns, listeners, active tasks, ...) */
int unstoppable_jobs = 0; /* number of active jobs that can't be stopped during a soft stop */
int active_peers = 0; /* number of active peers (connection attempts and connected) */
int connected_peers = 0; /* number of connected peers (verified ones) */
/* Here we store information about the pids of the processes we may pause
* or kill. We will send them a signal every 10 ms until we can bind to all
* our ports. With 200 retries, that's about 2 seconds.
*/
#define MAX_START_RETRIES 200
static int *oldpids = NULL;
static int oldpids_sig; /* use USR1 or TERM */
/* Path to the unix socket we use to retrieve listener sockets from the old process */
static const char *old_unixsocket;
int atexit_flag = 0;
int nb_oldpids = 0;
const int zero = 0;
const int one = 1;
const struct linger nolinger = { .l_onoff = 1, .l_linger = 0 };
char hostname[MAX_HOSTNAME_LEN];
char *localpeer = NULL;
static char **old_argv = NULL; /* previous argv but cleaned up */
struct list proc_list = LIST_HEAD_INIT(proc_list);
int master = 0; /* 1 if in master, 0 if in child */
unsigned int rlim_fd_cur_at_boot = 0;
unsigned int rlim_fd_max_at_boot = 0;
/* per-boot randomness */
unsigned char boot_seed[20]; /* per-boot random seed (160 bits initially) */
/* takes the thread config in argument or NULL for any thread */
static void *run_thread_poll_loop(void *data);
/* bitfield of a few warnings to emit just once (WARN_*) */
unsigned int warned = 0;
/* set if experimental features have been used for the current process */
static unsigned int tainted = 0;
unsigned int experimental_directives_allowed = 0;
int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum,
char **errmsg)
{
if (kw->flags & KWF_EXPERIMENTAL) {
if (!experimental_directives_allowed) {
memprintf(errmsg, "parsing [%s:%d] : '%s' directive is experimental, must be allowed via a global 'expose-experimental-directives'",
file, linenum, kw->kw);
return 1;
}
mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
}
return 0;
}
/* master CLI configuration (-S flag) */
struct list mworker_cli_conf = LIST_HEAD_INIT(mworker_cli_conf);
/* These are strings to be reported in the output of "haproxy -vv". They may
* either be constants (in which case must_free must be zero) or dynamically
* allocated strings to pass to free() on exit, and in this case must_free
* must be non-zero.
*/
struct list build_opts_list = LIST_HEAD_INIT(build_opts_list);
struct build_opts_str {
struct list list;
const char *str;
int must_free;
};
/*********************************************************************/
/* general purpose functions ***************************************/
/*********************************************************************/
/* used to register some build option strings at boot. Set must_free to
* non-zero if the string must be freed upon exit.
*/
void hap_register_build_opts(const char *str, int must_free)
{
struct build_opts_str *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->str = str;
b->must_free = must_free;
LIST_APPEND(&build_opts_list, &b->list);
}
#define VERSION_MAX_ELTS 7
/* This function splits an haproxy version string into an array of integers.
* The syntax of the supported version string is the following:
*
* <a>[.<b>[.<c>[.<d>]]][-{dev,pre,rc}<f>][-*][-<g>]
*
* This validates for example:
* 1.2.1-pre2, 1.2.1, 1.2.10.1, 1.3.16-rc1, 1.4-dev3, 1.5-dev18, 1.5-dev18-43
* 2.4-dev18-f6818d-20
*
* The result is set in a array of <VERSION_MAX_ELTS> elements. Each letter has
* one fixed place in the array. The tags take a numeric value called <e> which
* defaults to 3. "dev" is 1, "rc" and "pre" are 2. Numbers not encountered are
* considered as zero (henxe 1.5 and 1.5.0 are the same).
*
* The resulting values are:
* 1.2.1-pre2 1, 2, 1, 0, 2, 2, 0
* 1.2.1 1, 2, 1, 0, 3, 0, 0
* 1.2.10.1 1, 2, 10, 1, 3, 0, 0
* 1.3.16-rc1 1, 3, 16, 0, 2, 1, 0
* 1.4-dev3 1, 4, 0, 0, 1, 3, 0
* 1.5-dev18 1, 5, 0, 0, 1, 18, 0
* 1.5-dev18-43 1, 5, 0, 0, 1, 18, 43
* 2.4-dev18-f6818d-20 2, 4, 0, 0, 1, 18, 20
*
* The function returns non-zero if the conversion succeeded, or zero if it
* failed.
*/
int split_version(const char *version, unsigned int *value)
{
const char *p, *s;
char *error;
int nelts;
/* Initialize array with zeroes */
for (nelts = 0; nelts < VERSION_MAX_ELTS; nelts++)
value[nelts] = 0;
value[4] = 3;
p = version;
/* If the version number is empty, return false */
if (*p == '\0')
return 0;
/* Convert first number <a> */
value[0] = strtol(p, &error, 10);
p = error + 1;
if (*error == '\0')
return 1;
if (*error == '-')
goto split_version_tag;
if (*error != '.')
return 0;
/* Convert first number <b> */
value[1] = strtol(p, &error, 10);
p = error + 1;
if (*error == '\0')
return 1;
if (*error == '-')
goto split_version_tag;
if (*error != '.')
return 0;
/* Convert first number <c> */
value[2] = strtol(p, &error, 10);
p = error + 1;
if (*error == '\0')
return 1;
if (*error == '-')
goto split_version_tag;
if (*error != '.')
return 0;
/* Convert first number <d> */
value[3] = strtol(p, &error, 10);
p = error + 1;
if (*error == '\0')
return 1;
if (*error != '-')
return 0;
split_version_tag:
/* Check for commit number */
if (*p >= '0' && *p <= '9')
goto split_version_commit;
/* Read tag */
if (strncmp(p, "dev", 3) == 0) { value[4] = 1; p += 3; }
else if (strncmp(p, "rc", 2) == 0) { value[4] = 2; p += 2; }
else if (strncmp(p, "pre", 3) == 0) { value[4] = 2; p += 3; }
else
goto split_version_commit;
/* Convert tag number */
value[5] = strtol(p, &error, 10);
p = error + 1;
if (*error == '\0')
return 1;
if (*error != '-')
return 0;
split_version_commit:
/* Search the last "-" */
s = strrchr(p, '-');
if (s) {
s++;
if (*s == '\0')
return 0;
value[6] = strtol(s, &error, 10);
if (*error != '\0')
value[6] = 0;
return 1;
}
/* convert the version */
value[6] = strtol(p, &error, 10);
if (*error != '\0')
value[6] = 0;
return 1;
}
/* This function compares the current haproxy version with an arbitrary version
* string. It returns:
* -1 : the version in argument is older than the current haproxy version
* 0 : the version in argument is the same as the current haproxy version
* 1 : the version in argument is newer than the current haproxy version
*
* Or some errors:
* -2 : the current haproxy version is not parsable
* -3 : the version in argument is not parsable
*/
int compare_current_version(const char *version)
{
unsigned int loc[VERSION_MAX_ELTS];
unsigned int mod[VERSION_MAX_ELTS];
int i;
/* split versions */
if (!split_version(haproxy_version, loc))
return -2;
if (!split_version(version, mod))
return -3;
/* compare versions */
for (i = 0; i < VERSION_MAX_ELTS; i++) {
if (mod[i] < loc[i])
return -1;
else if (mod[i] > loc[i])
return 1;
}
return 0;
}
static void display_version()
{
struct utsname utsname;
printf("HAProxy version %s %s - https://haproxy.org/\n"
PRODUCT_STATUS "\n", haproxy_version, haproxy_date);
if (strlen(PRODUCT_URL_BUGS) > 0) {
char base_version[20];
int dots = 0;
char *del;
/* only retrieve the base version without distro-specific extensions */
for (del = haproxy_version; *del; del++) {
if (*del == '.')
dots++;
else if (*del < '0' || *del > '9')
break;
}
strlcpy2(base_version, haproxy_version, del - haproxy_version + 1);
if (dots < 2)
printf("Known bugs: https://github.com/haproxy/haproxy/issues?q=is:issue+is:open\n");
else
printf("Known bugs: " PRODUCT_URL_BUGS "\n", base_version);
}
if (uname(&utsname) == 0) {
printf("Running on: %s %s %s %s\n", utsname.sysname, utsname.release, utsname.version, utsname.machine);
}
}
static void display_build_opts()
{
struct build_opts_str *item;
printf("Build options :"
#ifdef BUILD_TARGET
"\n TARGET = " BUILD_TARGET
#endif
#ifdef BUILD_CPU
"\n CPU = " BUILD_CPU
#endif
#ifdef BUILD_CC
"\n CC = " BUILD_CC
#endif
#ifdef BUILD_CFLAGS
"\n CFLAGS = " BUILD_CFLAGS
#endif
#ifdef BUILD_OPTIONS
"\n OPTIONS = " BUILD_OPTIONS
#endif
#ifdef BUILD_DEBUG
"\n DEBUG = " BUILD_DEBUG
#endif
#ifdef BUILD_FEATURES
"\n\nFeature list : " BUILD_FEATURES
#endif
"\n\nDefault settings :"
"\n bufsize = %d, maxrewrite = %d, maxpollevents = %d"
"\n\n",
BUFSIZE, MAXREWRITE, MAX_POLL_EVENTS);
list_for_each_entry(item, &build_opts_list, list) {
puts(item->str);
}
putchar('\n');
list_pollers(stdout);
putchar('\n');
list_mux_proto(stdout);
putchar('\n');
list_services(stdout);
putchar('\n');
list_filters(stdout);
putchar('\n');
}
/*
* This function prints the command line usage and exits
*/
static void usage(char *name)
{
display_version();
fprintf(stderr,
"Usage : %s [-f <cfgfile|cfgdir>]* [ -vdV"
"D ] [ -n <maxconn> ] [ -N <maxpconn> ]\n"
" [ -p <pidfile> ] [ -m <max megs> ] [ -C <dir> ] [-- <cfgfile>*]\n"
" -v displays version ; -vv shows known build options.\n"
" -d enters debug mode ; -db only disables background mode.\n"
" -dM[<byte>] poisons memory with <byte> (defaults to 0x50)\n"
" -V enters verbose mode (disables quiet mode)\n"
" -D goes daemon ; -C changes to <dir> before loading files.\n"
" -W master-worker mode.\n"
#if defined(USE_SYSTEMD)
" -Ws master-worker mode with systemd notify support.\n"
#endif
" -q quiet mode : don't display messages\n"
" -c check mode : only check config files and exit\n"
" -cc check condition : evaluate a condition and exit\n"
" -n sets the maximum total # of connections (uses ulimit -n)\n"
" -m limits the usable amount of memory (in MB)\n"
" -N sets the default, per-proxy maximum # of connections (%d)\n"
" -L set local peer name (default to hostname)\n"
" -p writes pids of all children to this file\n"
#if defined(USE_EPOLL)
" -de disables epoll() usage even when available\n"
#endif
#if defined(USE_KQUEUE)
" -dk disables kqueue() usage even when available\n"
#endif
#if defined(USE_EVPORTS)
" -dv disables event ports usage even when available\n"
#endif
#if defined(USE_POLL)
" -dp disables poll() usage even when available\n"
#endif
#if defined(USE_LINUX_SPLICE)
" -dS disables splice usage (broken on old kernels)\n"
#endif
#if defined(USE_GETADDRINFO)
" -dG disables getaddrinfo() usage\n"
#endif
#if defined(SO_REUSEPORT)
" -dR disables SO_REUSEPORT usage\n"
#endif
#if defined(HA_HAVE_DUMP_LIBS)
" -dL dumps loaded object files after config checks\n"
#endif
" -dr ignores server address resolution failures\n"
" -dV disables SSL verify on servers side\n"
" -dW fails if any warning is emitted\n"
" -dD diagnostic mode : warn about suspicious configuration statements\n"
" -sf/-st [pid ]* finishes/terminates old pids.\n"
" -x <unix_socket> get listening sockets from a unix socket\n"
" -S <bind>[,<bind options>...] new master CLI\n"
"\n",
name, cfg_maxpconn);
exit(1);
}
/*********************************************************************/
/* more specific functions ***************************************/
/*********************************************************************/
/* sends the signal <sig> to all pids found in <oldpids>. Returns the number of
* pids the signal was correctly delivered to.
*/
int tell_old_pids(int sig)
{
int p;
int ret = 0;
for (p = 0; p < nb_oldpids; p++)
if (kill(oldpids[p], sig) == 0)
ret++;
return ret;
}
/*
* remove a pid forom the olpid array and decrease nb_oldpids
* return 1 pid was found otherwise return 0
*/
int delete_oldpid(int pid)
{
int i;
for (i = 0; i < nb_oldpids; i++) {
if (oldpids[i] == pid) {
oldpids[i] = oldpids[nb_oldpids - 1];
oldpids[nb_oldpids - 1] = 0;
nb_oldpids--;
return 1;
}
}
return 0;
}
/*
* When called, this function reexec haproxy with -sf followed by current
* children PIDs and possibly old children PIDs if they didn't leave yet.
*/
static void mworker_reexec()
{
char **next_argv = NULL;
int old_argc = 0; /* previous number of argument */
int next_argc = 0;
int i = 0;
char *msg = NULL;
struct rlimit limit;
struct mworker_proc *current_child = NULL;
mworker_block_signals();
#if defined(USE_SYSTEMD)
if (global.tune.options & GTUNE_USE_SYSTEMD)
sd_notify(0, "RELOADING=1");
#endif
setenv("HAPROXY_MWORKER_REEXEC", "1", 1);
mworker_cleanup_proc();
mworker_proc_list_to_env(); /* put the children description in the env */
/* ensure that we close correctly every listeners before reexecuting */
mworker_cleanlisteners();
/* during the reload we must ensure that every FDs that can't be
* reuse (ie those that are not referenced in the proc_list)
* are closed or they will leak. */
/* close the listeners FD */
mworker_cli_proxy_stop();
if (fdtab)
deinit_pollers();
#ifdef HAVE_SSL_RAND_KEEP_RANDOM_DEVICES_OPEN
/* close random device FDs */
RAND_keep_random_devices_open(0);
#endif
/* restore the initial FD limits */
limit.rlim_cur = rlim_fd_cur_at_boot;
limit.rlim_max = rlim_fd_max_at_boot;
if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
getrlimit(RLIMIT_NOFILE, &limit);
ha_warning("Failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
(unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
}
/* compute length */
while (old_argv[old_argc])
old_argc++;
/* 1 for haproxy -sf, 2 for -x /socket */
next_argv = calloc(old_argc + 1 + 2 + mworker_child_nb() + 1,
sizeof(*next_argv));
if (next_argv == NULL)
goto alloc_error;
/* copy the program name */
next_argv[next_argc++] = old_argv[0];
/* insert the new options just after argv[0] in case we have a -- */
if (getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) {
/* add -sf <PID>* to argv */
if (mworker_child_nb() > 0) {
struct mworker_proc *child;
next_argv[next_argc++] = "-sf";
list_for_each_entry(child, &proc_list, list) {
if (!(child->options & PROC_O_LEAVING) && (child->options & PROC_O_TYPE_WORKER))
current_child = child;
if (!(child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) || child->pid <= -1)
continue;
if ((next_argv[next_argc++] = memprintf(&msg, "%d", child->pid)) == NULL)
goto alloc_error;
msg = NULL;
}
}
if (current_child) {
/* add the -x option with the socketpair of the current worker */
next_argv[next_argc++] = "-x";
if ((next_argv[next_argc++] = memprintf(&msg, "sockpair@%d", current_child->ipc_fd[0])) == NULL)
goto alloc_error;
msg = NULL;
}
}
/* copy the previous options */
for (i = 1; i < old_argc; i++)
next_argv[next_argc++] = old_argv[i];
signal(SIGPROF, SIG_IGN);
execvp(next_argv[0], next_argv);
ha_warning("Failed to reexecute the master process [%d]: %s\n", pid, strerror(errno));
ha_free(&next_argv);
return;
alloc_error:
ha_free(&next_argv);
ha_warning("Failed to reexecute the master process [%d]: Cannot allocate memory\n", pid);
return;
}
/* reexec haproxy in waitmode */
static void mworker_reexec_waitmode()
{
setenv("HAPROXY_MWORKER_WAIT_ONLY", "1", 1);
mworker_reexec();
}
/* reload haproxy and emit a warning */
void mworker_reload()
{
struct mworker_proc *child;
struct per_thread_deinit_fct *ptdf;
ha_notice("Reloading HAProxy\n");
/* close the poller FD and the thread waker pipe FD */
list_for_each_entry(ptdf, &per_thread_deinit_list, list)
ptdf->fct();
/* increment the number of reloads */
list_for_each_entry(child, &proc_list, list) {
child->reloads++;
}
mworker_reexec();
}
static void mworker_loop()
{
#if defined(USE_SYSTEMD)
if (global.tune.options & GTUNE_USE_SYSTEMD)
sd_notifyf(0, "READY=1\nMAINPID=%lu", (unsigned long)getpid());
#endif
/* Busy polling makes no sense in the master :-) */
global.tune.options &= ~GTUNE_BUSY_POLLING;
signal_unregister(SIGTTIN);
signal_unregister(SIGTTOU);
signal_unregister(SIGUSR1);
signal_unregister(SIGHUP);
signal_unregister(SIGQUIT);
signal_register_fct(SIGTERM, mworker_catch_sigterm, SIGTERM);
signal_register_fct(SIGUSR1, mworker_catch_sigterm, SIGUSR1);
signal_register_fct(SIGTTIN, mworker_broadcast_signal, SIGTTIN);
signal_register_fct(SIGTTOU, mworker_broadcast_signal, SIGTTOU);
signal_register_fct(SIGINT, mworker_catch_sigterm, SIGINT);
signal_register_fct(SIGHUP, mworker_catch_sighup, SIGHUP);
signal_register_fct(SIGUSR2, mworker_catch_sighup, SIGUSR2);
signal_register_fct(SIGCHLD, mworker_catch_sigchld, SIGCHLD);
mworker_unblock_signals();
mworker_cleantasks();
mworker_catch_sigchld(NULL); /* ensure we clean the children in case
some SIGCHLD were lost */
global.nbthread = 1;
#ifdef USE_THREAD
tid_bit = 1;
all_threads_mask = 1;
#endif
jobs++; /* this is the "master" job, we want to take care of the
signals even if there is no listener so the poll loop don't
leave */
fork_poller();
run_thread_poll_loop(NULL);
}
/*
* Reexec the process in failure mode, instead of exiting
*/
void reexec_on_failure()
{
struct mworker_proc *child;
if (!atexit_flag)
return;
/* get the info of the children in the env */
if (mworker_env_to_proc_list() < 0) {
exit(EXIT_FAILURE);
}
/* increment the number of failed reloads */
list_for_each_entry(child, &proc_list, list) {
child->failedreloads++;
}
/* do not keep unused FDs retrieved from the previous process */
sock_drop_unused_old_sockets();
usermsgs_clr(NULL);
ha_warning("Loading failure!\n");
mworker_reexec_waitmode();
}
/*
* upon SIGUSR1, let's have a soft stop. Note that soft_stop() broadcasts
* a signal zero to all subscribers. This means that it's as easy as
* subscribing to signal 0 to get informed about an imminent shutdown.
*/
static void sig_soft_stop(struct sig_handler *sh)
{
soft_stop();
signal_unregister_handler(sh);
pool_gc(NULL);
}
/*
* upon SIGTTOU, we pause everything
*/
static void sig_pause(struct sig_handler *sh)
{
if (protocol_pause_all() & ERR_FATAL) {
const char *msg = "Some proxies refused to pause, performing soft stop now.\n";
ha_warning("%s", msg);
send_log(NULL, LOG_WARNING, "%s", msg);
soft_stop();
}
pool_gc(NULL);
}
/*
* upon SIGTTIN, let's have a soft stop.
*/
static void sig_listen(struct sig_handler *sh)
{
if (protocol_resume_all() & ERR_FATAL) {
const char *msg = "Some proxies refused to resume, probably due to a conflict on a listening port. You may want to try again after the conflicting application is stopped, otherwise a restart might be needed to resume safe operations.\n";
ha_warning("%s", msg);
send_log(NULL, LOG_WARNING, "%s", msg);
}
}
/*
* this function dumps every server's state when the process receives SIGHUP.
*/
static void sig_dump_state(struct sig_handler *sh)
{
struct proxy *p = proxies_list;
ha_warning("SIGHUP received, dumping servers states.\n");
while (p) {
struct server *s = p->srv;
send_log(p, LOG_NOTICE, "SIGHUP received, dumping servers states for proxy %s.\n", p->id);
while (s) {
chunk_printf(&trash,
"SIGHUP: Server %s/%s is %s. Conn: %d act, %d pend, %lld tot.",
p->id, s->id,
(s->cur_state != SRV_ST_STOPPED) ? "UP" : "DOWN",
s->cur_sess, s->queue.length, s->counters.cum_sess);
ha_warning("%s\n", trash.area);
send_log(p, LOG_NOTICE, "%s\n", trash.area);
s = s->next;
}
/* FIXME: those info are a bit outdated. We should be able to distinguish between FE and BE. */
if (!p->srv) {
chunk_printf(&trash,
"SIGHUP: Proxy %s has no servers. Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
p->id,
p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
} else if (p->srv_act == 0) {
chunk_printf(&trash,
"SIGHUP: Proxy %s %s ! Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
p->id,
(p->srv_bck) ? "is running on backup servers" : "has no server available",
p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
} else {
chunk_printf(&trash,
"SIGHUP: Proxy %s has %d active servers and %d backup servers available."
" Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
p->id, p->srv_act, p->srv_bck,
p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
}
ha_warning("%s\n", trash.area);
send_log(p, LOG_NOTICE, "%s\n", trash.area);
p = p->next;
}
}
static void dump(struct sig_handler *sh)
{
/* dump memory usage then free everything possible */
dump_pools();
pool_gc(NULL);
}
/*
* This function dup2 the stdio FDs (0,1,2) with <fd>, then closes <fd>
* If <fd> < 0, it opens /dev/null and use it to dup
*
* In the case of chrooting, you have to open /dev/null before the chroot, and
* pass the <fd> to this function
*/
static void stdio_quiet(int fd)
{
if (fd < 0)
fd = open("/dev/null", O_RDWR, 0);
if (fd > -1) {
fclose(stdin);
fclose(stdout);
fclose(stderr);
dup2(fd, 0);
dup2(fd, 1);
dup2(fd, 2);
if (fd > 2)
close(fd);
return;
}
ha_alert("Cannot open /dev/null\n");
exit(EXIT_FAILURE);
}
/* This function checks if cfg_cfgfiles contains directories.
* If it finds one, it adds all the files (and only files) it contains
* in cfg_cfgfiles in place of the directory (and removes the directory).
* It adds the files in lexical order.
* It adds only files with .cfg extension.
* It doesn't add files with name starting with '.'
*/
static void cfgfiles_expand_directories(void)
{
struct wordlist *wl, *wlb;
char *err = NULL;
list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
struct stat file_stat;
struct dirent **dir_entries = NULL;
int dir_entries_nb;
int dir_entries_it;
if (stat(wl->s, &file_stat)) {
ha_alert("Cannot open configuration file/directory %s : %s\n",
wl->s,
strerror(errno));
exit(1);
}
if (!S_ISDIR(file_stat.st_mode))
continue;
/* from this point wl->s is a directory */
dir_entries_nb = scandir(wl->s, &dir_entries, NULL, alphasort);
if (dir_entries_nb < 0) {
ha_alert("Cannot open configuration directory %s : %s\n",
wl->s,
strerror(errno));
exit(1);
}
/* for each element in the directory wl->s */
for (dir_entries_it = 0; dir_entries_it < dir_entries_nb; dir_entries_it++) {
struct dirent *dir_entry = dir_entries[dir_entries_it];
char *filename = NULL;
char *d_name_cfgext = strstr(dir_entry->d_name, ".cfg");
/* don't add filename that begin with .
* only add filename with .cfg extension
*/
if (dir_entry->d_name[0] == '.' ||
!(d_name_cfgext && d_name_cfgext[4] == '\0'))
goto next_dir_entry;
if (!memprintf(&filename, "%s/%s", wl->s, dir_entry->d_name)) {
ha_alert("Cannot load configuration files %s : out of memory.\n",
filename);
exit(1);
}
if (stat(filename, &file_stat)) {
ha_alert("Cannot open configuration file %s : %s\n",
wl->s,
strerror(errno));
exit(1);
}
/* don't add anything else than regular file in cfg_cfgfiles
* this way we avoid loops
*/
if (!S_ISREG(file_stat.st_mode))
goto next_dir_entry;
if (!list_append_word(&wl->list, filename, &err)) {
ha_alert("Cannot load configuration files %s : %s\n",
filename,
err);
exit(1);
}
next_dir_entry:
free(filename);
free(dir_entry);
}
free(dir_entries);
/* remove the current directory (wl) from cfg_cfgfiles */
free(wl->s);
LIST_DELETE(&wl->list);
free(wl);
}
free(err);
}
/*
* copy and cleanup the current argv
* Remove the -sf /-st / -x parameters
* Return an allocated copy of argv
*/
static char **copy_argv(int argc, char **argv)
{
char **newargv, **retargv;
newargv = calloc(argc + 2, sizeof(*newargv));
if (newargv == NULL) {
ha_warning("Cannot allocate memory\n");
return NULL;
}
retargv = newargv;
/* first copy argv[0] */
*newargv++ = *argv++;
argc--;
while (argc > 0) {
if (**argv != '-') {
/* non options are copied but will fail in the argument parser */
*newargv++ = *argv++;
argc--;
} else {
char *flag;
flag = *argv + 1;
if (flag[0] == '-' && flag[1] == 0) {
/* "--\0" copy every arguments till the end of argv */
*newargv++ = *argv++;
argc--;
while (argc > 0) {
*newargv++ = *argv++;
argc--;
}
} else {
switch (*flag) {
case 's':
/* -sf / -st and their parameters are ignored */
if (flag[1] == 'f' || flag[1] == 't') {
argc--;
argv++;
/* The list can't contain a negative value since the only
way to know the end of this list is by looking for the
next option or the end of the options */
while (argc > 0 && argv[0][0] != '-') {
argc--;
argv++;
}
} else {
argc--;
argv++;
}
break;
case 'x':
/* this option and its parameter are ignored */
argc--;
argv++;
if (argc > 0) {
argc--;
argv++;
}
break;
case 'C':
case 'n':
case 'm':
case 'N':
case 'L':
case 'f':
case 'p':
case 'S':
/* these options have only 1 parameter which must be copied and can start with a '-' */
*newargv++ = *argv++;
argc--;
if (argc == 0)
goto error;
*newargv++ = *argv++;
argc--;
break;
default:
/* for other options just copy them without parameters, this is also done
* for options like "--foo", but this will fail in the argument parser.
* */
*newargv++ = *argv++;
argc--;
break;
}
}
}
}
return retargv;
error:
free(retargv);
return NULL;
}
/* Performs basic random seed initialization. The main issue with this is that
* srandom_r() only takes 32 bits and purposely provides a reproducible sequence,
* which means that there will only be 4 billion possible random sequences once
* srandom() is called, regardless of the internal state. Not calling it is
* even worse as we'll always produce the same randoms sequences. What we do
* here is to create an initial sequence from various entropy sources, hash it
* using SHA1 and keep the resulting 160 bits available globally.
*
* We initialize the current process with the first 32 bits before starting the
* polling loop, where all this will be changed to have process specific and
* thread specific sequences.
*
* Before starting threads, it's still possible to call random() as srandom()
* is initialized from this, but after threads and/or processes are started,
* only ha_random() is expected to be used to guarantee distinct sequences.
*/
static void ha_random_boot(char *const *argv)
{
unsigned char message[256];
unsigned char *m = message;
struct timeval tv;
blk_SHA_CTX ctx;
unsigned long l;
int fd;
int i;
/* start with current time as pseudo-random seed */
gettimeofday(&tv, NULL);
write_u32(m, tv.tv_sec); m += 4;
write_u32(m, tv.tv_usec); m += 4;
/* PID and PPID add some OS-based randomness */
write_u16(m, getpid()); m += 2;
write_u16(m, getppid()); m += 2;
/* take up to 160 bits bytes from /dev/urandom if available (non-blocking) */
fd = open("/dev/urandom", O_RDONLY);
if (fd >= 0) {
i = read(fd, m, 20);
if (i > 0)
m += i;
close(fd);
}
/* take up to 160 bits bytes from openssl (non-blocking) */
#ifdef USE_OPENSSL
if (RAND_bytes(m, 20) == 1)
m += 20;
#endif
/* take 160 bits from existing random in case it was already initialized */
for (i = 0; i < 5; i++) {
write_u32(m, random());
m += 4;
}
/* stack address (benefit form operating system's ASLR) */
l = (unsigned long)&m;
memcpy(m, &l, sizeof(l)); m += sizeof(l);
/* argv address (benefit form operating system's ASLR) */
l = (unsigned long)&argv;
memcpy(m, &l, sizeof(l)); m += sizeof(l);
/* use tv_usec again after all the operations above */
gettimeofday(&tv, NULL);
write_u32(m, tv.tv_usec); m += 4;
/*
* At this point, ~84-92 bytes have been used
*/
/* finish with the hostname */
strncpy((char *)m, hostname, message + sizeof(message) - m);
m += strlen(hostname);
/* total message length */
l = m - message;
memset(&ctx, 0, sizeof(ctx));
blk_SHA1_Init(&ctx);
blk_SHA1_Update(&ctx, message, l);
blk_SHA1_Final(boot_seed, &ctx);
srandom(read_u32(boot_seed));
ha_random_seed(boot_seed, sizeof(boot_seed));
}
/* considers splicing proxies' maxconn, computes the ideal global.maxpipes
* setting, and returns it. It may return -1 meaning "unlimited" if some
* unlimited proxies have been found and the global.maxconn value is not yet
* set. It may also return a value greater than maxconn if it's not yet set.
* Note that a value of zero means there is no need for pipes. -1 is never
* returned if global.maxconn is valid.
*/
static int compute_ideal_maxpipes()
{
struct proxy *cur;
int nbfe = 0, nbbe = 0;
int unlimited = 0;
int pipes;
int max;
for (cur = proxies_list; cur; cur = cur->next) {
if (cur->options2 & (PR_O2_SPLIC_ANY)) {
if (cur->cap & PR_CAP_FE) {
max = cur->maxconn;
nbfe += max;
if (!max) {
unlimited = 1;
break;
}
}
if (cur->cap & PR_CAP_BE) {
max = cur->fullconn ? cur->fullconn : global.maxconn;
nbbe += max;
if (!max) {
unlimited = 1;
break;
}
}
}
}
pipes = MAX(nbfe, nbbe);
if (global.maxconn) {
if (pipes > global.maxconn || unlimited)
pipes = global.maxconn;
} else if (unlimited) {
pipes = -1;
}
return pipes >= 4 ? pipes / 4 : pipes;
}
/* considers global.maxsocks, global.maxpipes, async engines, SSL frontends and
* rlimits and computes an ideal maxconn. It's meant to be called only when
* maxsock contains the sum of listening FDs, before it is updated based on
* maxconn and pipes. If there are not enough FDs left, DEFAULT_MAXCONN (by
* default 100) is returned as it is expected that it will even run on tight
* environments, and will maintain compatibility with previous packages that
* used to rely on this value as the default one. The system will emit a
* warning indicating how many FDs are missing anyway if needed.
*/
static int compute_ideal_maxconn()
{
int ssl_sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int engine_fds = global.ssl_used_async_engines * ssl_sides;
int pipes = compute_ideal_maxpipes();
int remain = MAX(rlim_fd_cur_at_boot, rlim_fd_max_at_boot);
int maxconn;
/* we have to take into account these elements :
* - number of engine_fds, which inflates the number of FD needed per
* connection by this number.
* - number of pipes per connection on average : for the unlimited
* case, this is 0.5 pipe FDs per connection, otherwise it's a
* fixed value of 2*pipes.
* - two FDs per connection
*/
/* subtract listeners and checks */
remain -= global.maxsock;
/* one epoll_fd/kqueue_fd per thread */
remain -= global.nbthread;
/* one wake-up pipe (2 fd) per thread */
remain -= 2 * global.nbthread;
/* Fixed pipes values : we only subtract them if they're not larger
* than the remaining FDs because pipes are optional.
*/
if (pipes >= 0 && pipes * 2 < remain)
remain -= pipes * 2;
if (pipes < 0) {
/* maxsock = maxconn * 2 + maxconn/4 * 2 + maxconn * engine_fds.
* = maxconn * (2 + 0.5 + engine_fds)
* = maxconn * (4 + 1 + 2*engine_fds) / 2
*/
maxconn = 2 * remain / (5 + 2 * engine_fds);
} else {
/* maxsock = maxconn * 2 + maxconn * engine_fds.
* = maxconn * (2 + engine_fds)
*/
maxconn = remain / (2 + engine_fds);
}
return MAX(maxconn, DEFAULT_MAXCONN);
}
/* computes the estimated maxsock value for the given maxconn based on the
* possibly set global.maxpipes and existing partial global.maxsock. It may
* temporarily change global.maxconn for the time needed to propagate the
* computations, and will reset it.
*/
static int compute_ideal_maxsock(int maxconn)
{
int maxpipes = global.maxpipes;
int maxsock = global.maxsock;
if (!maxpipes) {
int old_maxconn = global.maxconn;
global.maxconn = maxconn;
maxpipes = compute_ideal_maxpipes();
global.maxconn = old_maxconn;
}
maxsock += maxconn * 2; /* each connection needs two sockets */
maxsock += maxpipes * 2; /* each pipe needs two FDs */
maxsock += global.nbthread; /* one epoll_fd/kqueue_fd per thread */
maxsock += 2 * global.nbthread; /* one wake-up pipe (2 fd) per thread */
/* compute fd used by async engines */
if (global.ssl_used_async_engines) {
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
maxsock += maxconn * sides * global.ssl_used_async_engines;
}
return maxsock;
}
/* Tests if it is possible to set the current process's RLIMIT_NOFILE to
* <maxsock>, then sets it back to the previous value. Returns non-zero if the
* value is accepted, non-zero otherwise. This is used to determine if an
* automatic limit may be applied or not. When it is not, the caller knows that
* the highest we can do is the rlim_max at boot. In case of error, we return
* that the setting is possible, so that we defer the error processing to the
* final stage in charge of enforcing this.
*/
static int check_if_maxsock_permitted(int maxsock)
{
struct rlimit orig_limit, test_limit;
int ret;
if (getrlimit(RLIMIT_NOFILE, &orig_limit) != 0)
return 1;
/* don't go further if we can't even set to what we have */
if (setrlimit(RLIMIT_NOFILE, &orig_limit) != 0)
return 1;
test_limit.rlim_max = MAX(maxsock, orig_limit.rlim_max);
test_limit.rlim_cur = test_limit.rlim_max;
ret = setrlimit(RLIMIT_NOFILE, &test_limit);
if (setrlimit(RLIMIT_NOFILE, &orig_limit) != 0)
return 1;
return ret == 0;
}
void mark_tainted(const enum tainted_flags flag)
{
HA_ATOMIC_OR(&tainted, flag);
}
unsigned int get_tainted()
{
int tainted_state;
HA_ATOMIC_STORE(&tainted_state, tainted);
return tainted_state;
}
/*
* This function initializes all the necessary variables. It only returns
* if everything is OK. If something fails, it exits.
*/
static void init(int argc, char **argv)
{
int arg_mode = 0; /* MODE_DEBUG, ... */
char *tmp;
char *cfg_pidfile = NULL;
int err_code = 0;
char *err_msg = NULL;
struct wordlist *wl;
char *progname;
char *change_dir = NULL;
struct proxy *px;
struct post_check_fct *pcf;
int ideal_maxconn;
char *check_condition = NULL;
global.mode = MODE_STARTING;
old_argv = copy_argv(argc, argv);
if (!old_argv) {
ha_alert("failed to copy argv.\n");
exit(1);
}
if (!init_trash_buffers(1)) {
ha_alert("failed to initialize trash buffers.\n");
exit(1);
}
/* NB: POSIX does not make it mandatory for gethostname() to NULL-terminate
* the string in case of truncation, and at least FreeBSD appears not to do
* it.
*/
memset(hostname, 0, sizeof(hostname));
gethostname(hostname, sizeof(hostname) - 1);
if ((localpeer = strdup(hostname)) == NULL) {
ha_alert("Cannot allocate memory for local peer.\n");
exit(EXIT_FAILURE);
}
setenv("HAPROXY_LOCALPEER", localpeer, 1);
/* we were in mworker mode, we should restart in mworker mode */
if (getenv("HAPROXY_MWORKER_REEXEC") != NULL)
global.mode |= MODE_MWORKER;
/*
* Initialize the previously static variables.
*/
totalconn = actconn = listeners = stopping = 0;
killed = 0;
#ifdef HAPROXY_MEMMAX
global.rlimit_memmax_all = HAPROXY_MEMMAX;
#endif
tzset();
clock_init_process_date();
start_date = now;
ha_random_boot(argv);
if (init_acl() != 0)
exit(1);
#ifdef USE_OPENSSL
/* Initialize the random generator.
* Must be called before chroot for access to /dev/urandom
*/
if (!ssl_initialize_random()) {
ha_alert("OpenSSL random data generator initialization failed.\n");
exit(1);
}
#endif
/* Initialise lua. */
hlua_init();
/* Initialize process vars */
vars_init_head(&proc_vars, SCOPE_PROC);
global.tune.options |= GTUNE_USE_SELECT; /* select() is always available */
#if defined(USE_POLL)
global.tune.options |= GTUNE_USE_POLL;
#endif
#if defined(USE_EPOLL)
global.tune.options |= GTUNE_USE_EPOLL;
#endif
#if defined(USE_KQUEUE)
global.tune.options |= GTUNE_USE_KQUEUE;
#endif
#if defined(USE_EVPORTS)
global.tune.options |= GTUNE_USE_EVPORTS;
#endif
#if defined(USE_LINUX_SPLICE)
global.tune.options |= GTUNE_USE_SPLICE;
#endif
#if defined(USE_GETADDRINFO)
global.tune.options |= GTUNE_USE_GAI;
#endif
#if defined(SO_REUSEPORT)
global.tune.options |= GTUNE_USE_REUSEPORT;
#endif
#ifdef USE_THREAD
global.tune.options |= GTUNE_IDLE_POOL_SHARED;
#endif
global.tune.options |= GTUNE_STRICT_LIMITS;
pid = getpid();
progname = *argv;
while ((tmp = strchr(progname, '/')) != NULL)
progname = tmp + 1;
/* the process name is used for the logs only */
chunk_initlen(&global.log_tag, strdup(progname), strlen(progname), strlen(progname));
if (b_orig(&global.log_tag) == NULL) {
chunk_destroy(&global.log_tag);
ha_alert("Cannot allocate memory for log_tag.\n");
exit(EXIT_FAILURE);
}
argc--; argv++;
while (argc > 0) {
char *flag;
if (**argv == '-') {
flag = *argv+1;
/* 1 arg */
if (*flag == 'v') {
display_version();
if (flag[1] == 'v') /* -vv */
display_build_opts();
exit(0);
}
#if defined(USE_EPOLL)
else if (*flag == 'd' && flag[1] == 'e')
global.tune.options &= ~GTUNE_USE_EPOLL;
#endif
#if defined(USE_POLL)
else if (*flag == 'd' && flag[1] == 'p')
global.tune.options &= ~GTUNE_USE_POLL;
#endif
#if defined(USE_KQUEUE)
else if (*flag == 'd' && flag[1] == 'k')
global.tune.options &= ~GTUNE_USE_KQUEUE;
#endif
#if defined(USE_EVPORTS)
else if (*flag == 'd' && flag[1] == 'v')
global.tune.options &= ~GTUNE_USE_EVPORTS;
#endif
#if defined(USE_LINUX_SPLICE)
else if (*flag == 'd' && flag[1] == 'S')
global.tune.options &= ~GTUNE_USE_SPLICE;
#endif
#if defined(USE_GETADDRINFO)
else if (*flag == 'd' && flag[1] == 'G')
global.tune.options &= ~GTUNE_USE_GAI;
#endif
#if defined(SO_REUSEPORT)
else if (*flag == 'd' && flag[1] == 'R')
global.tune.options &= ~GTUNE_USE_REUSEPORT;
#endif
else if (*flag == 'd' && flag[1] == 'V')
global.ssl_server_verify = SSL_SERVER_VERIFY_NONE;
else if (*flag == 'V')
arg_mode |= MODE_VERBOSE;
else if (*flag == 'd' && flag[1] == 'b')
arg_mode |= MODE_FOREGROUND;
else if (*flag == 'd' && flag[1] == 'D')
arg_mode |= MODE_DIAG;
else if (*flag == 'd' && flag[1] == 'W')
arg_mode |= MODE_ZERO_WARNING;
else if (*flag == 'd' && flag[1] == 'M')
mem_poison_byte = flag[2] ? strtol(flag + 2, NULL, 0) : 'P';
else if (*flag == 'd' && flag[1] == 'r')
global.tune.options |= GTUNE_RESOLVE_DONTFAIL;
#if defined(HA_HAVE_DUMP_LIBS)
else if (*flag == 'd' && flag[1] == 'L')
arg_mode |= MODE_DUMP_LIBS;
#endif
else if (*flag == 'd')
arg_mode |= MODE_DEBUG;
else if (*flag == 'c' && flag[1] == 'c') {
arg_mode |= MODE_CHECK_CONDITION;
argv++;
argc--;
check_condition = *argv;
}
else if (*flag == 'c')
arg_mode |= MODE_CHECK;
else if (*flag == 'D')
arg_mode |= MODE_DAEMON;
else if (*flag == 'W' && flag[1] == 's') {
arg_mode |= MODE_MWORKER | MODE_FOREGROUND;
#if defined(USE_SYSTEMD)
global.tune.options |= GTUNE_USE_SYSTEMD;
#else
ha_alert("master-worker mode with systemd support (-Ws) requested, but not compiled. Use master-worker mode (-W) if you are not using Type=notify in your unit file or recompile with USE_SYSTEMD=1.\n\n");
usage(progname);
#endif
}
else if (*flag == 'W')
arg_mode |= MODE_MWORKER;
else if (*flag == 'q')
arg_mode |= MODE_QUIET;
else if (*flag == 'x') {
if (argc <= 1) {
ha_alert("Unix socket path expected with the -x flag\n\n");
usage(progname);
}
if (old_unixsocket)
ha_warning("-x option already set, overwriting the value\n");
old_unixsocket = argv[1];
argv++;
argc--;
}
else if (*flag == 'S') {
struct wordlist *c;
if (argc <= 1) {
ha_alert("Socket and optional bind parameters expected with the -S flag\n");
usage(progname);
}
if ((c = malloc(sizeof(*c))) == NULL || (c->s = strdup(argv[1])) == NULL) {
ha_alert("Cannot allocate memory\n");
exit(EXIT_FAILURE);
}
LIST_INSERT(&mworker_cli_conf, &c->list);
argv++;
argc--;
}
else if (*flag == 's' && (flag[1] == 'f' || flag[1] == 't')) {
/* list of pids to finish ('f') or terminate ('t') */
if (flag[1] == 'f')
oldpids_sig = SIGUSR1; /* finish then exit */
else
oldpids_sig = SIGTERM; /* terminate immediately */
while (argc > 1 && argv[1][0] != '-') {
char * endptr = NULL;
oldpids = realloc(oldpids, (nb_oldpids + 1) * sizeof(int));
if (!oldpids) {
ha_alert("Cannot allocate old pid : out of memory.\n");
exit(1);
}
argc--; argv++;
errno = 0;
oldpids[nb_oldpids] = strtol(*argv, &endptr, 10);
if (errno) {
ha_alert("-%2s option: failed to parse {%s}: %s\n",
flag,
*argv, strerror(errno));
exit(1);
} else if (endptr && strlen(endptr)) {
while (isspace((unsigned char)*endptr)) endptr++;
if (*endptr != 0) {
ha_alert("-%2s option: some bytes unconsumed in PID list {%s}\n",
flag, endptr);
exit(1);
}
}
if (oldpids[nb_oldpids] <= 0)
usage(progname);
nb_oldpids++;
}
}
else if (flag[0] == '-' && flag[1] == 0) { /* "--" */
/* now that's a cfgfile list */
argv++; argc--;
while (argc > 0) {
if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
ha_alert("Cannot load configuration file/directory %s : %s\n",
*argv,
err_msg);
exit(1);
}
argv++; argc--;
}
break;
}
else { /* >=2 args */
argv++; argc--;
if (argc == 0)
usage(progname);
switch (*flag) {
case 'C' : change_dir = *argv; break;
case 'n' : cfg_maxconn = atol(*argv); break;
case 'm' : global.rlimit_memmax_all = atol(*argv); break;
case 'N' : cfg_maxpconn = atol(*argv); break;
case 'L' :
free(localpeer);
if ((localpeer = strdup(*argv)) == NULL) {
ha_alert("Cannot allocate memory for local peer.\n");
exit(EXIT_FAILURE);
}
setenv("HAPROXY_LOCALPEER", localpeer, 1);
global.localpeer_cmdline = 1;
break;
case 'f' :
if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
ha_alert("Cannot load configuration file/directory %s : %s\n",
*argv,
err_msg);
exit(1);
}
break;
case 'p' : cfg_pidfile = *argv; break;
default: usage(progname);
}
}
}
else
usage(progname);
argv++; argc--;
}
global.mode |= (arg_mode & (MODE_DAEMON | MODE_MWORKER | MODE_FOREGROUND | MODE_VERBOSE
| MODE_QUIET | MODE_CHECK | MODE_DEBUG | MODE_ZERO_WARNING
| MODE_DIAG | MODE_CHECK_CONDITION | MODE_DUMP_LIBS));
if (getenv("HAPROXY_MWORKER_WAIT_ONLY")) {
unsetenv("HAPROXY_MWORKER_WAIT_ONLY");
global.mode |= MODE_MWORKER_WAIT;
global.mode &= ~MODE_MWORKER;
}
if ((global.mode & (MODE_MWORKER | MODE_CHECK | MODE_CHECK_CONDITION)) == MODE_MWORKER &&
(getenv("HAPROXY_MWORKER_REEXEC") != NULL)) {
atexit_flag = 1;
atexit(reexec_on_failure);
}
if (change_dir && chdir(change_dir) < 0) {
ha_alert("Could not change to directory %s : %s\n", change_dir, strerror(errno));
exit(1);
}
global.maxsock = 10; /* reserve 10 fds ; will be incremented by socket eaters */
#ifdef USE_CPU_AFFINITY
{
int i;
ha_cpuset_zero(&cpu_map.proc);
ha_cpuset_zero(&cpu_map.proc_t1);
for (i = 0; i < MAX_THREADS; ++i) {
ha_cpuset_zero(&cpu_map.thread[i]);
}
}
#endif
usermsgs_clr("config");
if (global.mode & MODE_CHECK_CONDITION) {
int result;
uint32_t err;
const char *errptr;
char *errmsg = NULL;
char *args[MAX_LINE_ARGS+1];
int arg = sizeof(args) / sizeof(*args);
size_t outlen = strlen(check_condition) + 1;
char *w;
err = parse_line(check_condition, check_condition, &outlen, args, &arg,
PARSE_OPT_ENV | PARSE_OPT_WORD_EXPAND | PARSE_OPT_DQUOTE | PARSE_OPT_SQUOTE | PARSE_OPT_BKSLASH,
&errptr);
if (err & PARSE_ERR_QUOTE) {
ha_alert("Syntax Error in condition: Unmatched quote.\n");
exit(2);
}
if (err & PARSE_ERR_HEX) {
ha_alert("Syntax Error in condition: Truncated or invalid hexadecimal sequence.\n");
exit(2);
}
if (err & (PARSE_ERR_TOOLARGE|PARSE_ERR_OVERLAP)) {
ha_alert("Error in condition: Line too long.\n");
exit(2);
}
if (err & PARSE_ERR_TOOMANY) {
ha_alert("Error in condition: Too many words.\n");
exit(2);
}
if (err) {
ha_alert("Unhandled error in condition, please report this to the developers.\n");
exit(2);
}
/* remerge all words into a single expression */
for (w = *args; (w += strlen(w)) < check_condition + outlen - 1; *w = ' ')
;
result = cfg_eval_condition(args, &errmsg, &errptr);
if (result < 0) {
if (errmsg)
ha_alert("Failed to evaluate condition: %s\n", errmsg);
exit(2);
}
exit(result ? 0 : 1);
}
/* in wait mode, we don't try to read the configuration files */
if (!(global.mode & MODE_MWORKER_WAIT)) {
char *env_cfgfiles = NULL;
int env_err = 0;
/* handle cfgfiles that are actually directories */
cfgfiles_expand_directories();
if (LIST_ISEMPTY(&cfg_cfgfiles))
usage(progname);
list_for_each_entry(wl, &cfg_cfgfiles, list) {
int ret;
if (env_err == 0) {
if (!memprintf(&env_cfgfiles, "%s%s%s",
(env_cfgfiles ? env_cfgfiles : ""),
(env_cfgfiles ? ";" : ""), wl->s))
env_err = 1;
}
ret = readcfgfile(wl->s);
if (ret == -1) {
ha_alert("Could not open configuration file %s : %s\n",
wl->s, strerror(errno));
free(env_cfgfiles);
exit(1);
}
if (ret & (ERR_ABORT|ERR_FATAL))
ha_alert("Error(s) found in configuration file : %s\n", wl->s);
err_code |= ret;
if (err_code & ERR_ABORT) {
free(env_cfgfiles);
exit(1);
}
}
/* do not try to resolve arguments nor to spot inconsistencies when
* the configuration contains fatal errors caused by files not found
* or failed memory allocations.
*/
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Fatal errors found in configuration.\n");
free(env_cfgfiles);
exit(1);
}
if (env_err) {
ha_alert("Could not allocate memory for HAPROXY_CFGFILES env variable\n");
exit(1);
}
setenv("HAPROXY_CFGFILES", env_cfgfiles, 1);
free(env_cfgfiles);
}
if (global.mode & MODE_MWORKER) {
struct mworker_proc *tmproc;
setenv("HAPROXY_MWORKER", "1", 1);
if (getenv("HAPROXY_MWORKER_REEXEC") == NULL) {
tmproc = mworker_proc_new();
if (!tmproc) {
ha_alert("Cannot allocate process structures.\n");
exit(EXIT_FAILURE);
}
tmproc->options |= PROC_O_TYPE_MASTER; /* master */
tmproc->pid = pid;
tmproc->timestamp = start_date.tv_sec;
proc_self = tmproc;
LIST_APPEND(&proc_list, &tmproc->list);
}
tmproc = mworker_proc_new();
if (!tmproc) {
ha_alert("Cannot allocate process structures.\n");
exit(EXIT_FAILURE);
}
tmproc->options |= PROC_O_TYPE_WORKER; /* worker */
if (mworker_cli_sockpair_new(tmproc, 0) < 0) {
exit(EXIT_FAILURE);
}
LIST_APPEND(&proc_list, &tmproc->list);
}
if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
struct wordlist *it, *c;
/* get the info of the children in the env */
if (mworker_env_to_proc_list() < 0) {
exit(EXIT_FAILURE);
}
if (!LIST_ISEMPTY(&mworker_cli_conf)) {
if (mworker_cli_proxy_create() < 0) {
ha_alert("Can't create the master's CLI.\n");
exit(EXIT_FAILURE);
}
list_for_each_entry_safe(c, it, &mworker_cli_conf, list) {
if (mworker_cli_proxy_new_listener(c->s) < 0) {
ha_alert("Can't create the master's CLI.\n");
exit(EXIT_FAILURE);
}
LIST_DELETE(&c->list);
free(c->s);
free(c);
}
}
}
if (!LIST_ISEMPTY(&mworker_cli_conf) && !(arg_mode & MODE_MWORKER)) {
ha_warning("a master CLI socket was defined, but master-worker mode (-W) is not enabled.\n");
}
/* destroy unreferenced defaults proxies */
proxy_destroy_all_unref_defaults();
err_code |= check_config_validity();
for (px = proxies_list; px; px = px->next) {
struct server *srv;
struct post_proxy_check_fct *ppcf;
struct post_server_check_fct *pscf;
if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
continue;
list_for_each_entry(pscf, &post_server_check_list, list) {
for (srv = px->srv; srv; srv = srv->next)
err_code |= pscf->fct(srv);
}
list_for_each_entry(ppcf, &post_proxy_check_list, list)
err_code |= ppcf->fct(px);
}
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Fatal errors found in configuration.\n");
exit(1);
}
err_code |= pattern_finalize_config();
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Failed to finalize pattern config.\n");
exit(1);
}
if (global.rlimit_memmax_all)
global.rlimit_memmax = global.rlimit_memmax_all;
#ifdef USE_NS
err_code |= netns_init();
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Failed to initialize namespace support.\n");
exit(1);
}
#endif
/* Apply server states */
apply_server_state();
for (px = proxies_list; px; px = px->next)
srv_compute_all_admin_states(px);
/* Apply servers' configured address */
err_code |= srv_init_addr();
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Failed to initialize server(s) addr.\n");
exit(1);
}
if (warned & WARN_ANY && global.mode & MODE_ZERO_WARNING) {
ha_alert("Some warnings were found and 'zero-warning' is set. Aborting.\n");
exit(1);
}
#if defined(HA_HAVE_DUMP_LIBS)
if (global.mode & MODE_DUMP_LIBS) {
qfprintf(stdout, "List of loaded object files:\n");
chunk_reset(&trash);
if (dump_libs(&trash, 0))
printf("%s", trash.area);
}
#endif
if (global.mode & MODE_CHECK) {
struct peers *pr;
struct proxy *px;
if (warned & WARN_ANY)
qfprintf(stdout, "Warnings were found.\n");
for (pr = cfg_peers; pr; pr = pr->next)
if (pr->peers_fe)
break;
for (px = proxies_list; px; px = px->next)
if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && px->li_all)
break;
if (!px) {
/* We may only have log-forward section */
for (px = cfg_log_forward; px; px = px->next)
if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && px->li_all)
break;
}
if (pr || px) {
/* At least one peer or one listener has been found */
qfprintf(stdout, "Configuration file is valid\n");
deinit_and_exit(0);
}
qfprintf(stdout, "Configuration file has no error but will not start (no listener) => exit(2).\n");
exit(2);
}
if (global.mode & MODE_DIAG) {
cfg_run_diagnostics();
}
/* now we know the buffer size, we can initialize the channels and buffers */
init_buffer();
list_for_each_entry(pcf, &post_check_list, list) {
err_code |= pcf->fct();
if (err_code & (ERR_ABORT|ERR_FATAL))
exit(1);
}
if (cfg_maxconn > 0)
global.maxconn = cfg_maxconn;
if (global.cli_fe)
global.maxsock += global.cli_fe->maxconn;
if (cfg_peers) {
/* peers also need to bypass global maxconn */
struct peers *p = cfg_peers;
for (p = cfg_peers; p; p = p->next)
if (p->peers_fe)
global.maxsock += p->peers_fe->maxconn;
}
if (cfg_pidfile) {
free(global.pidfile);
global.pidfile = strdup(cfg_pidfile);
}
/* Now we want to compute the maxconn and possibly maxsslconn values.
* It's a bit tricky. Maxconn defaults to the pre-computed value based
* on rlim_fd_cur and the number of FDs in use due to the configuration,
* and maxsslconn defaults to DEFAULT_MAXSSLCONN. On top of that we can
* enforce a lower limit based on memmax.
*
* If memmax is set, then it depends on which values are set. If
* maxsslconn is set, we use memmax to determine how many cleartext
* connections may be added, and set maxconn to the sum of the two.
* If maxconn is set and not maxsslconn, maxsslconn is computed from
* the remaining amount of memory between memmax and the cleartext
* connections. If neither are set, then it is considered that all
* connections are SSL-capable, and maxconn is computed based on this,
* then maxsslconn accordingly. We need to know if SSL is used on the
* frontends, backends, or both, because when it's used on both sides,
* we need twice the value for maxsslconn, but we only count the
* handshake once since it is not performed on the two sides at the
* same time (frontend-side is terminated before backend-side begins).
* The SSL stack is supposed to have filled ssl_session_cost and
* ssl_handshake_cost during its initialization. In any case, if
* SYSTEM_MAXCONN is set, we still enforce it as an upper limit for
* maxconn in order to protect the system.
*/
ideal_maxconn = compute_ideal_maxconn();
if (!global.rlimit_memmax) {
if (global.maxconn == 0) {
global.maxconn = ideal_maxconn;
if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
fprintf(stderr, "Note: setting global.maxconn to %d.\n", global.maxconn);
}
}
#ifdef USE_OPENSSL
else if (!global.maxconn && !global.maxsslconn &&
(global.ssl_used_frontend || global.ssl_used_backend)) {
/* memmax is set, compute everything automatically. Here we want
* to ensure that all SSL connections will be served. We take
* care of the number of sides where SSL is used, and consider
* the worst case : SSL used on both sides and doing a handshake
* simultaneously. Note that we can't have more than maxconn
* handshakes at a time by definition, so for the worst case of
* two SSL conns per connection, we count a single handshake.
*/
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int64_t mem = global.rlimit_memmax * 1048576ULL;
int retried = 0;
mem -= global.tune.sslcachesize * 200; // about 200 bytes per SSL cache entry
mem -= global.maxzlibmem;
mem = mem * MEM_USABLE_RATIO;
/* Principle: we test once to set maxconn according to the free
* memory. If it results in values the system rejects, we try a
* second time by respecting rlim_fd_max. If it fails again, we
* go back to the initial value and will let the final code
* dealing with rlimit report the error. That's up to 3 attempts.
*/
do {
global.maxconn = mem /
((STREAM_MAX_COST + 2 * global.tune.bufsize) + // stream + 2 buffers per stream
sides * global.ssl_session_max_cost + // SSL buffers, one per side
global.ssl_handshake_max_cost); // 1 handshake per connection max
if (retried == 1)
global.maxconn = MIN(global.maxconn, ideal_maxconn);
global.maxconn = round_2dig(global.maxconn);
#ifdef SYSTEM_MAXCONN
if (global.maxconn > SYSTEM_MAXCONN)
global.maxconn = SYSTEM_MAXCONN;
#endif /* SYSTEM_MAXCONN */
global.maxsslconn = sides * global.maxconn;
if (check_if_maxsock_permitted(compute_ideal_maxsock(global.maxconn)))
break;
} while (retried++ < 2);
if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
fprintf(stderr, "Note: setting global.maxconn to %d and global.maxsslconn to %d.\n",
global.maxconn, global.maxsslconn);
}
else if (!global.maxsslconn &&
(global.ssl_used_frontend || global.ssl_used_backend)) {
/* memmax and maxconn are known, compute maxsslconn automatically.
* maxsslconn being forced, we don't know how many of it will be
* on each side if both sides are being used. The worst case is
* when all connections use only one SSL instance because
* handshakes may be on two sides at the same time.
*/
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int64_t mem = global.rlimit_memmax * 1048576ULL;
int64_t sslmem;
mem -= global.tune.sslcachesize * 200; // about 200 bytes per SSL cache entry
mem -= global.maxzlibmem;
mem = mem * MEM_USABLE_RATIO;
sslmem = mem - global.maxconn * (int64_t)(STREAM_MAX_COST + 2 * global.tune.bufsize);
global.maxsslconn = sslmem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost);
global.maxsslconn = round_2dig(global.maxsslconn);
if (sslmem <= 0 || global.maxsslconn < sides) {
ha_alert("Cannot compute the automatic maxsslconn because global.maxconn is already too "
"high for the global.memmax value (%d MB). The absolute maximum possible value "
"without SSL is %d, but %d was found and SSL is in use.\n",
global.rlimit_memmax,
(int)(mem / (STREAM_MAX_COST + 2 * global.tune.bufsize)),
global.maxconn);
exit(1);
}
if (global.maxsslconn > sides * global.maxconn)
global.maxsslconn = sides * global.maxconn;
if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
fprintf(stderr, "Note: setting global.maxsslconn to %d\n", global.maxsslconn);
}
#endif
else if (!global.maxconn) {
/* memmax and maxsslconn are known/unused, compute maxconn automatically */
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int64_t mem = global.rlimit_memmax * 1048576ULL;
int64_t clearmem;
int retried = 0;
if (global.ssl_used_frontend || global.ssl_used_backend)
mem -= global.tune.sslcachesize * 200; // about 200 bytes per SSL cache entry
mem -= global.maxzlibmem;
mem = mem * MEM_USABLE_RATIO;
clearmem = mem;
if (sides)
clearmem -= (global.ssl_session_max_cost + global.ssl_handshake_max_cost) * (int64_t)global.maxsslconn;
/* Principle: we test once to set maxconn according to the free
* memory. If it results in values the system rejects, we try a
* second time by respecting rlim_fd_max. If it fails again, we
* go back to the initial value and will let the final code
* dealing with rlimit report the error. That's up to 3 attempts.
*/
do {
global.maxconn = clearmem / (STREAM_MAX_COST + 2 * global.tune.bufsize);
if (retried == 1)
global.maxconn = MIN(global.maxconn, ideal_maxconn);
global.maxconn = round_2dig(global.maxconn);
#ifdef SYSTEM_MAXCONN
if (global.maxconn > SYSTEM_MAXCONN)
global.maxconn = SYSTEM_MAXCONN;
#endif /* SYSTEM_MAXCONN */
if (clearmem <= 0 || !global.maxconn) {
ha_alert("Cannot compute the automatic maxconn because global.maxsslconn is already too "
"high for the global.memmax value (%d MB). The absolute maximum possible value "
"is %d, but %d was found.\n",
global.rlimit_memmax,
(int)(mem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost)),
global.maxsslconn);
exit(1);
}
if (check_if_maxsock_permitted(compute_ideal_maxsock(global.maxconn)))
break;
} while (retried++ < 2);
if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
if (sides && global.maxsslconn > sides * global.maxconn) {
fprintf(stderr, "Note: global.maxsslconn is forced to %d which causes global.maxconn "
"to be limited to %d. Better reduce global.maxsslconn to get more "
"room for extra connections.\n", global.maxsslconn, global.maxconn);
}
fprintf(stderr, "Note: setting global.maxconn to %d\n", global.maxconn);
}
}
global.maxsock = compute_ideal_maxsock(global.maxconn);
global.hardmaxconn = global.maxconn;
if (!global.maxpipes)
global.maxpipes = compute_ideal_maxpipes();
/* update connection pool thresholds */
global.tune.pool_low_count = ((long long)global.maxsock * global.tune.pool_low_ratio + 99) / 100;
global.tune.pool_high_count = ((long long)global.maxsock * global.tune.pool_high_ratio + 99) / 100;
proxy_adjust_all_maxconn();
if (global.tune.maxpollevents <= 0)
global.tune.maxpollevents = MAX_POLL_EVENTS;
if (global.tune.runqueue_depth <= 0) {
/* tests on various thread counts from 1 to 64 have shown an
* optimal queue depth following roughly 1/sqrt(threads).
*/
int s = my_flsl(global.nbthread);
s += (global.nbthread / s); // roughly twice the sqrt.
global.tune.runqueue_depth = RUNQUEUE_DEPTH * 2 / s;
}
if (global.tune.recv_enough == 0)
global.tune.recv_enough = MIN_RECV_AT_ONCE_ENOUGH;
if (global.tune.maxrewrite >= global.tune.bufsize / 2)
global.tune.maxrewrite = global.tune.bufsize / 2;
usermsgs_clr(NULL);
if (arg_mode & (MODE_DEBUG | MODE_FOREGROUND)) {
/* command line debug mode inhibits configuration mode */
global.mode &= ~(MODE_DAEMON | MODE_QUIET);
global.mode |= (arg_mode & (MODE_DEBUG | MODE_FOREGROUND));
}
if (arg_mode & MODE_DAEMON) {
/* command line daemon mode inhibits foreground and debug modes mode */
global.mode &= ~(MODE_DEBUG | MODE_FOREGROUND);
global.mode |= arg_mode & MODE_DAEMON;
}
global.mode |= (arg_mode & (MODE_QUIET | MODE_VERBOSE));
if ((global.mode & MODE_DEBUG) && (global.mode & (MODE_DAEMON | MODE_QUIET))) {
ha_warning("<debug> mode incompatible with <quiet> and <daemon>. Keeping <debug> only.\n");
global.mode &= ~(MODE_DAEMON | MODE_QUIET);
}
if (global.nbthread < 1)
global.nbthread = 1;
/* Realloc trash buffers because global.tune.bufsize may have changed */
if (!init_trash_buffers(0)) {
ha_alert("failed to initialize trash buffers.\n");
exit(1);
}
if (!init_log_buffers()) {
ha_alert("failed to initialize log buffers.\n");
exit(1);
}
/*
* Note: we could register external pollers here.
* Built-in pollers have been registered before main().
*/
if (!(global.tune.options & GTUNE_USE_KQUEUE))
disable_poller("kqueue");
if (!(global.tune.options & GTUNE_USE_EVPORTS))
disable_poller("evports");
if (!(global.tune.options & GTUNE_USE_EPOLL))
disable_poller("epoll");
if (!(global.tune.options & GTUNE_USE_POLL))
disable_poller("poll");
if (!(global.tune.options & GTUNE_USE_SELECT))
disable_poller("select");
/* Note: we could disable any poller by name here */
if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
list_pollers(stderr);
fprintf(stderr, "\n");
list_filters(stderr);
}
if (!init_pollers()) {
ha_alert("No polling mechanism available.\n"
" It is likely that haproxy was built with TARGET=generic and that FD_SETSIZE\n"
" is too low on this platform to support maxconn and the number of listeners\n"
" and servers. You should rebuild haproxy specifying your system using TARGET=\n"
" in order to support other polling systems (poll, epoll, kqueue) or reduce the\n"
" global maxconn setting to accommodate the system's limitation. For reference,\n"
" FD_SETSIZE=%d on this system, global.maxconn=%d resulting in a maximum of\n"
" %d file descriptors. You should thus reduce global.maxconn by %d. Also,\n"
" check build settings using 'haproxy -vv'.\n\n",
FD_SETSIZE, global.maxconn, global.maxsock, (global.maxsock + 1 - FD_SETSIZE) / 2);
exit(1);
}
if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
printf("Using %s() as the polling mechanism.\n", cur_poller.name);
}
if (!global.node)
global.node = strdup(hostname);
/* stop disabled proxies */
for (px = proxies_list; px; px = px->next) {
if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
stop_proxy(px);
}
if (!hlua_post_init())
exit(1);
free(err_msg);
}
void deinit(void)
{
struct proxy *p = proxies_list, *p0;
struct wordlist *wl, *wlb;
struct uri_auth *uap, *ua = NULL;
struct logsrv *log, *logb;
struct build_opts_str *bol, *bolb;
struct post_deinit_fct *pdf, *pdfb;
struct proxy_deinit_fct *pxdf, *pxdfb;
struct server_deinit_fct *srvdf, *srvdfb;
struct per_thread_init_fct *tif, *tifb;
struct per_thread_deinit_fct *tdf, *tdfb;
struct per_thread_alloc_fct *taf, *tafb;
struct per_thread_free_fct *tff, *tffb;
struct post_server_check_fct *pscf, *pscfb;
struct post_check_fct *pcf, *pcfb;
struct post_proxy_check_fct *ppcf, *ppcfb;
int cur_fd;
/* At this point the listeners state is weird:
* - most listeners are still bound and referenced in their protocol
* - some might be zombies that are not in their proto anymore, but
* still appear in their proxy's listeners with a valid FD.
* - some might be stopped and still appear in their proxy as FD #-1
* - among all of them, some might be inherited hence shared and we're
* not allowed to pause them or whatever, we must just close them.
* - finally some are not listeners (pipes, logs, stdout, etc) and
* must be left intact.
*
* The safe way to proceed is to unbind (and close) whatever is not yet
* unbound so that no more receiver/listener remains alive. Then close
* remaining listener FDs, which correspond to zombie listeners (those
* belonging to disabled proxies that were in another process).
* objt_listener() would be cleaner here but not converted yet.
*/
protocol_unbind_all();
for (cur_fd = 0; cur_fd < global.maxsock; cur_fd++) {
if (!fdtab || !fdtab[cur_fd].owner)
continue;
if (fdtab[cur_fd].iocb == &sock_accept_iocb) {
struct listener *l = fdtab[cur_fd].owner;
BUG_ON(l->state != LI_INIT);
unbind_listener(l);
}
}
deinit_signals();
while (p) {
/* build a list of unique uri_auths */
if (!ua)
ua = p->uri_auth;
else {
/* check if p->uri_auth is unique */
for (uap = ua; uap; uap=uap->next)
if (uap == p->uri_auth)
break;
if (!uap && p->uri_auth) {
/* add it, if it is */
p->uri_auth->next = ua;
ua = p->uri_auth;
}
}
p0 = p;
p = p->next;
free_proxy(p0);
}/* end while(p) */
/* destroy all referenced defaults proxies */
proxy_destroy_all_unref_defaults();
while (ua) {
struct stat_scope *scope, *scopep;
uap = ua;
ua = ua->next;
free(uap->uri_prefix);
free(uap->auth_realm);
free(uap->node);
free(uap->desc);
userlist_free(uap->userlist);
free_act_rules(&uap->http_req_rules);
scope = uap->scope;
while (scope) {
scopep = scope;
scope = scope->next;
free(scopep->px_id);
free(scopep);
}
free(uap);
}
userlist_free(userlist);
cfg_unregister_sections();
deinit_log_buffers();
list_for_each_entry(pdf, &post_deinit_list, list)
pdf->fct();
ha_free(&global.log_send_hostname);
chunk_destroy(&global.log_tag);
ha_free(&global.chroot);
ha_free(&global.pidfile);
ha_free(&global.node);
ha_free(&global.desc);
ha_free(&oldpids);
ha_free(&old_argv);
ha_free(&localpeer);
ha_free(&global.server_state_base);
ha_free(&global.server_state_file);
task_destroy(idle_conn_task);
idle_conn_task = NULL;
list_for_each_entry_safe(log, logb, &global.logsrvs, list) {
LIST_DELETE(&log->list);
free(log->conf.file);
free(log);
}
list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
free(wl->s);
LIST_DELETE(&wl->list);
free(wl);
}
list_for_each_entry_safe(bol, bolb, &build_opts_list, list) {
if (bol->must_free)
free((void *)bol->str);
LIST_DELETE(&bol->list);
free(bol);
}
list_for_each_entry_safe(pxdf, pxdfb, &proxy_deinit_list, list) {
LIST_DELETE(&pxdf->list);
free(pxdf);
}
list_for_each_entry_safe(pdf, pdfb, &post_deinit_list, list) {
LIST_DELETE(&pdf->list);
free(pdf);
}
list_for_each_entry_safe(srvdf, srvdfb, &server_deinit_list, list) {
LIST_DELETE(&srvdf->list);
free(srvdf);
}
list_for_each_entry_safe(pcf, pcfb, &post_check_list, list) {
LIST_DELETE(&pcf->list);
free(pcf);
}
list_for_each_entry_safe(pscf, pscfb, &post_server_check_list, list) {
LIST_DELETE(&pscf->list);
free(pscf);
}
list_for_each_entry_safe(ppcf, ppcfb, &post_proxy_check_list, list) {
LIST_DELETE(&ppcf->list);
free(ppcf);
}
list_for_each_entry_safe(tif, tifb, &per_thread_init_list, list) {
LIST_DELETE(&tif->list);
free(tif);
}
list_for_each_entry_safe(tdf, tdfb, &per_thread_deinit_list, list) {
LIST_DELETE(&tdf->list);
free(tdf);
}
list_for_each_entry_safe(taf, tafb, &per_thread_alloc_list, list) {
LIST_DELETE(&taf->list);
free(taf);
}
list_for_each_entry_safe(tff, tffb, &per_thread_free_list, list) {
LIST_DELETE(&tff->list);
free(tff);
}
vars_prune(&proc_vars, NULL, NULL);
pool_destroy_all();
deinit_pollers();
} /* end deinit() */
__attribute__((noreturn)) void deinit_and_exit(int status)
{
global.mode |= MODE_STOPPING;
deinit();
exit(status);
}
/* Runs the polling loop */
void run_poll_loop()
{
int next, wake;
clock_update_date(0,1);
while (1) {
wake_expired_tasks();
/* check if we caught some signals and process them in the
first thread */
if (signal_queue_len && tid == 0) {
activity[tid].wake_signal++;
signal_process_queue();
}
/* Process a few tasks */
process_runnable_tasks();
/* also stop if we failed to cleanly stop all tasks */
if (killed > 1)
break;
/* expire immediately if events are pending */
wake = 1;
if (thread_has_tasks())
activity[tid].wake_tasks++;
else {
_HA_ATOMIC_OR(&sleeping_thread_mask, tid_bit);
__ha_barrier_atomic_store();
if (thread_has_tasks()) {
activity[tid].wake_tasks++;
_HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
} else
wake = 0;
}
if (!wake) {
int i;
if (stopping) {
/* stop muxes before acknowledging stopping */
if (!(stopping_thread_mask & tid_bit)) {
task_wakeup(mux_stopping_data[tid].task, TASK_WOKEN_OTHER);
wake = 1;
}
if (_HA_ATOMIC_OR_FETCH(&stopping_thread_mask, tid_bit) == tid_bit) {
/* notify all threads that stopping was just set */
for (i = 0; i < global.nbthread; i++)
if (((all_threads_mask & ~stopping_thread_mask) >> i) & 1)
wake_thread(i);
}
}
/* stop when there's nothing left to do */
if ((jobs - unstoppable_jobs) == 0 &&
(stopping_thread_mask & all_threads_mask) == all_threads_mask) {
/* wake all threads waiting on jobs==0 */
for (i = 0; i < global.nbthread; i++)
if (((all_threads_mask & ~tid_bit) >> i) & 1)
wake_thread(i);
break;
}
}
/* If we have to sleep, measure how long */
next = wake ? TICK_ETERNITY : next_timer_expiry();
/* The poller will ensure it returns around <next> */
cur_poller.poll(&cur_poller, next, wake);
activity[tid].loops++;
}
}
static void *run_thread_poll_loop(void *data)
{
struct per_thread_alloc_fct *ptaf;
struct per_thread_init_fct *ptif;
struct per_thread_deinit_fct *ptdf;
struct per_thread_free_fct *ptff;
static int init_left = 0;
__decl_thread(static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER);
__decl_thread(static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER);
ha_set_thread(data);
set_thread_cpu_affinity();
clock_set_local_source();
/* Now, initialize one thread init at a time. This is better since
* some init code is a bit tricky and may release global resources
* after reallocating them locally. This will also ensure there is
* no race on file descriptors allocation.
*/
#ifdef USE_THREAD
pthread_mutex_lock(&init_mutex);
#endif
/* The first thread must set the number of threads left */
if (!init_left)
init_left = global.nbthread;
init_left--;
clock_init_thread_date();
/* per-thread alloc calls performed here are not allowed to snoop on
* other threads, so they are free to initialize at their own rhythm
* as long as they act as if they were alone. None of them may rely
* on resources initialized by the other ones.
*/
list_for_each_entry(ptaf, &per_thread_alloc_list, list) {
if (!ptaf->fct()) {
ha_alert("failed to allocate resources for thread %u.\n", tid);
#ifdef USE_THREAD
pthread_mutex_unlock(&init_mutex);
#endif
exit(1);
}
}
/* per-thread init calls performed here are not allowed to snoop on
* other threads, so they are free to initialize at their own rhythm
* as long as they act as if they were alone.
*/
list_for_each_entry(ptif, &per_thread_init_list, list) {
if (!ptif->fct()) {
ha_alert("failed to initialize thread %u.\n", tid);
#ifdef USE_THREAD
pthread_mutex_unlock(&init_mutex);
#endif
exit(1);
}
}
/* enabling protocols will result in fd_insert() calls to be performed,
* we want all threads to have already allocated their local fd tables
* before doing so, thus only the last thread does it.
*/
if (init_left == 0)
protocol_enable_all();
#ifdef USE_THREAD
pthread_cond_broadcast(&init_cond);
pthread_mutex_unlock(&init_mutex);
/* now wait for other threads to finish starting */
pthread_mutex_lock(&init_mutex);
while (init_left)
pthread_cond_wait(&init_cond, &init_mutex);
pthread_mutex_unlock(&init_mutex);
#endif
#if defined(PR_SET_NO_NEW_PRIVS) && defined(USE_PRCTL)
/* Let's refrain from using setuid executables. This way the impact of
* an eventual vulnerability in a library remains limited. It may
* impact external checks but who cares about them anyway ? In the
* worst case it's possible to disable the option. Obviously we do this
* in workers only. We can't hard-fail on this one as it really is
* implementation dependent though we're interested in feedback, hence
* the warning.
*/
if (!(global.tune.options & GTUNE_INSECURE_SETUID) && !master) {
static int warn_fail;
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1 && !_HA_ATOMIC_FETCH_ADD(&warn_fail, 1)) {
ha_warning("Failed to disable setuid, please report to developers with detailed "
"information about your operating system. You can silence this warning "
"by adding 'insecure-setuid-wanted' in the 'global' section.\n");
}
}
#endif
#if defined(RLIMIT_NPROC)
/* all threads have started, it's now time to prevent any new thread
* or process from starting. Obviously we do this in workers only. We
* can't hard-fail on this one as it really is implementation dependent
* though we're interested in feedback, hence the warning.
*/
if (!(global.tune.options & GTUNE_INSECURE_FORK) && !master) {
struct rlimit limit = { .rlim_cur = 0, .rlim_max = 0 };
static int warn_fail;
if (setrlimit(RLIMIT_NPROC, &limit) == -1 && !_HA_ATOMIC_FETCH_ADD(&warn_fail, 1)) {
ha_warning("Failed to disable forks, please report to developers with detailed "
"information about your operating system. You can silence this warning "
"by adding 'insecure-fork-wanted' in the 'global' section.\n");
}
}
#endif
run_poll_loop();
list_for_each_entry(ptdf, &per_thread_deinit_list, list)
ptdf->fct();
list_for_each_entry(ptff, &per_thread_free_list, list)
ptff->fct();
#ifdef USE_THREAD
_HA_ATOMIC_AND(&all_threads_mask, ~tid_bit);
if (tid > 0)
pthread_exit(NULL);
#endif
return NULL;
}
/* set uid/gid depending on global settings */
static void set_identity(const char *program_name)
{
if (global.gid) {
if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
" without 'uid'/'user' is generally useless.\n", program_name);
if (setgid(global.gid) == -1) {
ha_alert("[%s.main()] Cannot set gid %d.\n", program_name, global.gid);
protocol_unbind_all();
exit(1);
}
}
if (global.uid && setuid(global.uid) == -1) {
ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid);
protocol_unbind_all();
exit(1);
}
}
int main(int argc, char **argv)
{
int err, retry;
struct rlimit limit;
int pidfd = -1;
int intovf = (unsigned char)argc + 1; /* let the compiler know it's strictly positive */
/* Catch forced CFLAGS that miss 2-complement integer overflow */
if (intovf + 0x7FFFFFFF >= intovf) {
fprintf(stderr,
"FATAL ERROR: invalid code detected -- cannot go further, please recompile!\n"
"The source code was miscompiled by the compiler, which usually indicates that\n"
"some of the CFLAGS needed to work around overzealous compiler optimizations\n"
"were overwritten at build time. Please do not force CFLAGS, and read Makefile\n"
"and INSTALL files to decide on the best way to pass your local build options.\n"
"\nBuild options :"
#ifdef BUILD_TARGET
"\n TARGET = " BUILD_TARGET
#endif
#ifdef BUILD_CPU
"\n CPU = " BUILD_CPU
#endif
#ifdef BUILD_CC
"\n CC = " BUILD_CC
#endif
#ifdef BUILD_CFLAGS
"\n CFLAGS = " BUILD_CFLAGS
#endif
#ifdef BUILD_OPTIONS
"\n OPTIONS = " BUILD_OPTIONS
#endif
#ifdef BUILD_DEBUG
"\n DEBUG = " BUILD_DEBUG
#endif
"\n\n");
return 1;
}
setvbuf(stdout, NULL, _IONBF, 0);
/* take a copy of initial limits before we possibly change them */
getrlimit(RLIMIT_NOFILE, &limit);
if (limit.rlim_max == RLIM_INFINITY)
limit.rlim_max = limit.rlim_cur;
rlim_fd_cur_at_boot = limit.rlim_cur;
rlim_fd_max_at_boot = limit.rlim_max;
/* process all initcalls in order of potential dependency */
RUN_INITCALLS(STG_PREPARE);
RUN_INITCALLS(STG_LOCK);
RUN_INITCALLS(STG_ALLOC);
RUN_INITCALLS(STG_POOL);
RUN_INITCALLS(STG_REGISTER);
RUN_INITCALLS(STG_INIT);
init(argc, argv);
signal_register_fct(SIGQUIT, dump, SIGQUIT);
signal_register_fct(SIGUSR1, sig_soft_stop, SIGUSR1);
signal_register_fct(SIGHUP, sig_dump_state, SIGHUP);
signal_register_fct(SIGUSR2, NULL, 0);
/* Always catch SIGPIPE even on platforms which define MSG_NOSIGNAL.
* Some recent FreeBSD setups report broken pipes, and MSG_NOSIGNAL
* was defined there, so let's stay on the safe side.
*/
signal_register_fct(SIGPIPE, NULL, 0);
/* ulimits */
if (!global.rlimit_nofile)
global.rlimit_nofile = global.maxsock;
if (global.rlimit_nofile) {
limit.rlim_cur = global.rlimit_nofile;
limit.rlim_max = MAX(rlim_fd_max_at_boot, limit.rlim_cur);
if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
getrlimit(RLIMIT_NOFILE, &limit);
if (global.tune.options & GTUNE_STRICT_LIMITS) {
ha_alert("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n",
argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
exit(1);
}
else {
/* try to set it to the max possible at least */
limit.rlim_cur = limit.rlim_max;
if (setrlimit(RLIMIT_NOFILE, &limit) != -1)
getrlimit(RLIMIT_NOFILE, &limit);
ha_warning("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n",
argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
global.rlimit_nofile = limit.rlim_cur;
}
}
}
if (global.rlimit_memmax) {
limit.rlim_cur = limit.rlim_max =
global.rlimit_memmax * 1048576ULL;
#ifdef RLIMIT_AS
if (setrlimit(RLIMIT_AS, &limit) == -1) {
if (global.tune.options & GTUNE_STRICT_LIMITS) {
ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n",
argv[0], global.rlimit_memmax);
exit(1);
}
else
ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
argv[0], global.rlimit_memmax);
}
#else
if (setrlimit(RLIMIT_DATA, &limit) == -1) {
if (global.tune.options & GTUNE_STRICT_LIMITS) {
ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n",
argv[0], global.rlimit_memmax);
exit(1);
}
else
ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
argv[0], global.rlimit_memmax);
}
#endif
}
/* Try to get the listeners FD from the previous process using
* _getsocks on the stat socket, it must never been done in wait mode
* and check mode
*/
if (old_unixsocket &&
!(global.mode & (MODE_MWORKER_WAIT|MODE_CHECK|MODE_CHECK_CONDITION))) {
if (strcmp("/dev/null", old_unixsocket) != 0) {
if (sock_get_old_sockets(old_unixsocket) != 0) {
ha_alert("Failed to get the sockets from the old process!\n");
if (!(global.mode & MODE_MWORKER))
exit(1);
}
}
}
/* We will loop at most 100 times with 10 ms delay each time.
* That's at most 1 second. We only send a signal to old pids
* if we cannot grab at least one port.
*/
retry = MAX_START_RETRIES;
err = ERR_NONE;
while (retry >= 0) {
struct timeval w;
err = protocol_bind_all(retry == 0 || nb_oldpids == 0);
/* exit the loop on no error or fatal error */
if ((err & (ERR_RETRYABLE|ERR_FATAL)) != ERR_RETRYABLE)
break;
if (nb_oldpids == 0 || retry == 0)
break;
/* FIXME-20060514: Solaris and OpenBSD do not support shutdown() on
* listening sockets. So on those platforms, it would be wiser to
* simply send SIGUSR1, which will not be undoable.
*/
if (tell_old_pids(SIGTTOU) == 0) {
/* no need to wait if we can't contact old pids */
retry = 0;
continue;
}
/* give some time to old processes to stop listening */
w.tv_sec = 0;
w.tv_usec = 10*1000;
select(0, NULL, NULL, NULL, &w);
retry--;
}
/* Note: protocol_bind_all() sends an alert when it fails. */
if ((err & ~ERR_WARN) != ERR_NONE) {
ha_alert("[%s.main()] Some protocols failed to start their listeners! Exiting.\n", argv[0]);
if (retry != MAX_START_RETRIES && nb_oldpids) {
protocol_unbind_all(); /* cleanup everything we can */
tell_old_pids(SIGTTIN);
}
exit(1);
}
if (!(global.mode & MODE_MWORKER_WAIT) && listeners == 0) {
ha_alert("[%s.main()] No enabled listener found (check for 'bind' directives) ! Exiting.\n", argv[0]);
/* Note: we don't have to send anything to the old pids because we
* never stopped them. */
exit(1);
}
/* Ok, all listeners should now be bound, close any leftover sockets
* the previous process gave us, we don't need them anymore
*/
sock_drop_unused_old_sockets();
/* prepare pause/play signals */
signal_register_fct(SIGTTOU, sig_pause, SIGTTOU);
signal_register_fct(SIGTTIN, sig_listen, SIGTTIN);
/* MODE_QUIET can inhibit alerts and warnings below this line */
if (getenv("HAPROXY_MWORKER_REEXEC") != NULL) {
/* either stdin/out/err are already closed or should stay as they are. */
if ((global.mode & MODE_DAEMON)) {
/* daemon mode re-executing, stdin/stdout/stderr are already closed so keep quiet */
global.mode &= ~MODE_VERBOSE;
global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
}
} else {
if ((global.mode & MODE_QUIET) && !(global.mode & MODE_VERBOSE)) {
/* detach from the tty */
stdio_quiet(-1);
}
}
/* open log & pid files before the chroot */
if ((global.mode & MODE_DAEMON || global.mode & MODE_MWORKER) && global.pidfile != NULL) {
unlink(global.pidfile);
pidfd = open(global.pidfile, O_CREAT | O_WRONLY | O_TRUNC, 0644);
if (pidfd < 0) {
ha_alert("[%s.main()] Cannot create pidfile %s\n", argv[0], global.pidfile);
if (nb_oldpids)
tell_old_pids(SIGTTIN);
protocol_unbind_all();
exit(1);
}
}
if ((global.last_checks & LSTCHK_NETADM) && global.uid) {
ha_alert("[%s.main()] Some configuration options require full privileges, so global.uid cannot be changed.\n"
"", argv[0]);
protocol_unbind_all();
exit(1);
}
/* If the user is not root, we'll still let them try the configuration
* but we inform them that unexpected behaviour may occur.
*/
if ((global.last_checks & LSTCHK_NETADM) && getuid())
ha_warning("[%s.main()] Some options which require full privileges"
" might not work well.\n"
"", argv[0]);
if ((global.mode & (MODE_MWORKER|MODE_DAEMON)) == 0) {
/* chroot if needed */
if (global.chroot != NULL) {
if (chroot(global.chroot) == -1 || chdir("/") == -1) {
ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
if (nb_oldpids)
tell_old_pids(SIGTTIN);
protocol_unbind_all();
exit(1);
}
}
}
if (nb_oldpids && !(global.mode & MODE_MWORKER_WAIT))
nb_oldpids = tell_old_pids(oldpids_sig);
/* send a SIGTERM to workers who have a too high reloads number */
if ((global.mode & MODE_MWORKER) && !(global.mode & MODE_MWORKER_WAIT))
mworker_kill_max_reloads(SIGTERM);
/* Note that any error at this stage will be fatal because we will not
* be able to restart the old pids.
*/
if ((global.mode & (MODE_MWORKER | MODE_DAEMON)) == 0)
set_identity(argv[0]);
/* check ulimits */
limit.rlim_cur = limit.rlim_max = 0;
getrlimit(RLIMIT_NOFILE, &limit);
if (limit.rlim_cur < global.maxsock) {
if (global.tune.options & GTUNE_STRICT_LIMITS) {
ha_alert("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. "
"Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock,
global.maxsock);
exit(1);
}
else
ha_alert("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. "
"Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock,
global.maxsock);
}
if (global.mode & (MODE_DAEMON | MODE_MWORKER | MODE_MWORKER_WAIT)) {
int ret = 0;
int in_parent = 0;
int devnullfd = -1;
/*
* if daemon + mworker: must fork here to let a master
* process live in background before forking children
*/
if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL)
&& (global.mode & MODE_MWORKER)
&& (global.mode & MODE_DAEMON)) {
ret = fork();
if (ret < 0) {
ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
protocol_unbind_all();
exit(1); /* there has been an error */
} else if (ret > 0) { /* parent leave to daemonize */
exit(0);
} else /* change the process group ID in the child (master process) */
setsid();
}
/* if in master-worker mode, write the PID of the father */
if (global.mode & MODE_MWORKER) {
char pidstr[100];
snprintf(pidstr, sizeof(pidstr), "%d\n", (int)getpid());
if (pidfd >= 0)
DISGUISE(write(pidfd, pidstr, strlen(pidstr)));
}
/* the father launches the required number of processes */
if (!(global.mode & MODE_MWORKER_WAIT)) {
if (global.mode & MODE_MWORKER)
mworker_ext_launch_all();
ret = fork();
if (ret < 0) {
ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
protocol_unbind_all();
exit(1); /* there has been an error */
}
else if (ret == 0) { /* child breaks here */
/* This one must not be exported, it's internal! */
unsetenv("HAPROXY_MWORKER_REEXEC");
ha_random_jump96(1);
}
else { /* parent here */
in_parent = 1;
if (pidfd >= 0 && !(global.mode & MODE_MWORKER)) {
char pidstr[100];
snprintf(pidstr, sizeof(pidstr), "%d\n", ret);
DISGUISE(write(pidfd, pidstr, strlen(pidstr)));
}
if (global.mode & MODE_MWORKER) {
struct mworker_proc *child;
ha_notice("New worker (%d) forked\n", ret);
/* find the right mworker_proc */
list_for_each_entry(child, &proc_list, list) {
if (child->reloads == 0 && child->options & PROC_O_TYPE_WORKER) {
child->timestamp = now.tv_sec;
child->pid = ret;
child->version = strdup(haproxy_version);
break;
}
}
}
}
} else {
/* wait mode */
in_parent = 1;
}
#ifdef USE_CPU_AFFINITY
if (!in_parent && ha_cpuset_count(&cpu_map.proc)) { /* only do this if the process has a CPU map */
#if defined(CPUSET_USE_CPUSET) || defined(__DragonFly__)
struct hap_cpuset *set = &cpu_map.proc;
sched_setaffinity(0, sizeof(set->cpuset), &set->cpuset);
#elif defined(__FreeBSD__)
struct hap_cpuset *set = &cpu_map.proc;
ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set->cpuset), &set->cpuset);
#endif
}
#endif
/* close the pidfile both in children and father */
if (pidfd >= 0) {
//lseek(pidfd, 0, SEEK_SET); /* debug: emulate eglibc bug */
close(pidfd);
}
/* We won't ever use this anymore */
ha_free(&global.pidfile);
if (in_parent) {
if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
master = 1;
if ((!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) &&
(global.mode & MODE_DAEMON)) {
/* detach from the tty, this is required to properly daemonize. */
if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL))
stdio_quiet(-1);
global.mode &= ~MODE_VERBOSE;
global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
}
if (global.mode & MODE_MWORKER_WAIT) {
/* only the wait mode handles the master CLI */
mworker_loop();
} else {
/* if not in wait mode, reload in wait mode to free the memory */
ha_notice("Loading success.\n");
proc_self->failedreloads = 0; /* reset the number of failure */
mworker_reexec_waitmode();
}
/* should never get there */
exit(EXIT_FAILURE);
}
#if defined(USE_OPENSSL) && !defined(OPENSSL_NO_DH)
ssl_free_dh();
#endif
exit(0); /* parent must leave */
}
/* child must never use the atexit function */
atexit_flag = 0;
/* close useless master sockets */
if (global.mode & MODE_MWORKER) {
struct mworker_proc *child, *it;
master = 0;
mworker_cli_proxy_stop();
/* free proc struct of other processes */
list_for_each_entry_safe(child, it, &proc_list, list) {
/* close the FD of the master side for all
* workers, we don't need to close the worker
* side of other workers since it's done with
* the bind_proc */
if (child->ipc_fd[0] >= 0) {
close(child->ipc_fd[0]);
child->ipc_fd[0] = -1;
}
if (child->options & PROC_O_TYPE_WORKER &&
child->reloads == 0) {
/* keep this struct if this is our pid */
proc_self = child;
continue;
}
LIST_DELETE(&child->list);
mworker_free_child(child);
child = NULL;
}
}
if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
devnullfd = open("/dev/null", O_RDWR, 0);
if (devnullfd < 0) {
ha_alert("Cannot open /dev/null\n");
exit(EXIT_FAILURE);
}
}
/* Must chroot and setgid/setuid in the children */
/* chroot if needed */
if (global.chroot != NULL) {
if (chroot(global.chroot) == -1 || chdir("/") == -1) {
ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
if (nb_oldpids)
tell_old_pids(SIGTTIN);
protocol_unbind_all();
exit(1);
}
}
ha_free(&global.chroot);
set_identity(argv[0]);
/* pass through every cli socket, and check if it's bound to
* the current process and if it exposes listeners sockets.
* Caution: the GTUNE_SOCKET_TRANSFER is now set after the fork.
* */
if (global.cli_fe) {
struct bind_conf *bind_conf;
list_for_each_entry(bind_conf, &global.cli_fe->conf.bind, by_fe) {
if (bind_conf->level & ACCESS_FD_LISTENERS) {
global.tune.options |= GTUNE_SOCKET_TRANSFER;
break;
}
}
}
/*
* This is only done in daemon mode because we might want the
* logs on stdout in mworker mode. If we're NOT in QUIET mode,
* we should now close the 3 first FDs to ensure that we can
* detach from the TTY. We MUST NOT do it in other cases since
* it would have already be done, and 0-2 would have been
* affected to listening sockets
*/
if ((global.mode & MODE_DAEMON) &&
(!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
/* detach from the tty */
stdio_quiet(devnullfd);
global.mode &= ~MODE_VERBOSE;
global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
}
pid = getpid(); /* update child's pid */
if (!(global.mode & MODE_MWORKER)) /* in mworker mode we don't want a new pgid for the children */
setsid();
fork_poller();
}
/* try our best to re-enable core dumps depending on system capabilities.
* What is addressed here :
* - remove file size limits
* - remove core size limits
* - mark the process dumpable again if it lost it due to user/group
*/
if (global.tune.options & GTUNE_SET_DUMPABLE) {
limit.rlim_cur = limit.rlim_max = RLIM_INFINITY;
#if defined(RLIMIT_FSIZE)
if (setrlimit(RLIMIT_FSIZE, &limit) == -1) {
if (global.tune.options & GTUNE_STRICT_LIMITS) {
ha_alert("[%s.main()] Failed to set the raise the maximum "
"file size.\n", argv[0]);
exit(1);
}
else
ha_warning("[%s.main()] Failed to set the raise the maximum "
"file size.\n", argv[0]);
}
#endif
#if defined(RLIMIT_CORE)
if (setrlimit(RLIMIT_CORE, &limit) == -1) {
if (global.tune.options & GTUNE_STRICT_LIMITS) {
ha_alert("[%s.main()] Failed to set the raise the core "
"dump size.\n", argv[0]);
exit(1);
}
else
ha_warning("[%s.main()] Failed to set the raise the core "
"dump size.\n", argv[0]);
}
#endif
#if defined(USE_PRCTL)
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1)
ha_warning("[%s.main()] Failed to set the dumpable flag, "
"no core will be dumped.\n", argv[0]);
#elif defined(USE_PROCCTL)
{
int traceable = PROC_TRACE_CTL_ENABLE;
if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &traceable) == -1)
ha_warning("[%s.main()] Failed to set the traceable flag, "
"no core will be dumped.\n", argv[0]);
}
#endif
}
global.mode &= ~MODE_STARTING;
reset_usermsgs_ctx();
/* start threads 2 and above */
setup_extra_threads(&run_thread_poll_loop);
/* when multithreading we need to let only the thread 0 handle the signals */
haproxy_unblock_signals();
/* Finally, start the poll loop for the first thread */
run_thread_poll_loop(&ha_thread_info[0]);
/* wait for all threads to terminate */
wait_for_threads_completion();
deinit_and_exit(0);
}
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/