mirror of
https://github.com/tailscale/tailscale.git
synced 2026-02-10 18:22:37 +01:00
This file was never truly necessary and has never actually been used in the history of Tailscale's open source releases. A Brief History of AUTHORS files --- The AUTHORS file was a pattern developed at Google, originally for Chromium, then adopted by Go and a bunch of other projects. The problem was that Chromium originally had a copyright line only recognizing Google as the copyright holder. Because Google (and most open source projects) do not require copyright assignemnt for contributions, each contributor maintains their copyright. Some large corporate contributors then tried to add their own name to the copyright line in the LICENSE file or in file headers. This quickly becomes unwieldy, and puts a tremendous burden on anyone building on top of Chromium, since the license requires that they keep all copyright lines intact. The compromise was to create an AUTHORS file that would list all of the copyright holders. The LICENSE file and source file headers would then include that list by reference, listing the copyright holder as "The Chromium Authors". This also become cumbersome to simply keep the file up to date with a high rate of new contributors. Plus it's not always obvious who the copyright holder is. Sometimes it is the individual making the contribution, but many times it may be their employer. There is no way for the proejct maintainer to know. Eventually, Google changed their policy to no longer recommend trying to keep the AUTHORS file up to date proactively, and instead to only add to it when requested: https://opensource.google/docs/releasing/authors. They are also clear that: > Adding contributors to the AUTHORS file is entirely within the > project's discretion and has no implications for copyright ownership. It was primarily added to appease a small number of large contributors that insisted that they be recognized as copyright holders (which was entirely their right to do). But it's not truly necessary, and not even the most accurate way of identifying contributors and/or copyright holders. In practice, we've never added anyone to our AUTHORS file. It only lists Tailscale, so it's not really serving any purpose. It also causes confusion because Tailscalars put the "Tailscale Inc & AUTHORS" header in other open source repos which don't actually have an AUTHORS file, so it's ambiguous what that means. Instead, we just acknowledge that the contributors to Tailscale (whoever they are) are copyright holders for their individual contributions. We also have the benefit of using the DCO (developercertificate.org) which provides some additional certification of their right to make the contribution. The source file changes were purely mechanical with: git ls-files | xargs sed -i -e 's/\(Tailscale Inc &\) AUTHORS/\1 contributors/g' Updates #cleanup Change-Id: Ia101a4a3005adb9118051b3416f5a64a4a45987d Signed-off-by: Will Norris <will@tailscale.com>
469 lines
14 KiB
Go
469 lines
14 KiB
Go
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
// Package auditlog provides a mechanism for logging audit events.
|
|
package auditlog
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"tailscale.com/ipn"
|
|
"tailscale.com/tailcfg"
|
|
"tailscale.com/types/logger"
|
|
"tailscale.com/util/rands"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
// transaction represents an audit log that has not yet been sent to the control plane.
|
|
type transaction struct {
|
|
// EventID is the unique identifier for the event being logged.
|
|
// This is used on the client side only and is not sent to control.
|
|
EventID string `json:",omitempty"`
|
|
// Retries is the number of times the logger has attempted to send this log.
|
|
// This is used on the client side only and is not sent to control.
|
|
Retries int `json:",omitempty"`
|
|
|
|
// Action is the action to be logged. It must correspond to a known action in the control plane.
|
|
Action tailcfg.ClientAuditAction `json:",omitempty"`
|
|
// Details is an opaque string specific to the action being logged. Empty strings may not
|
|
// be valid depending on the action being logged.
|
|
Details string `json:",omitempty"`
|
|
// TimeStamp is the time at which the audit log was generated on the node.
|
|
TimeStamp time.Time `json:",omitzero"`
|
|
}
|
|
|
|
// Transport provides a means for a client to send audit logs to a consumer (typically the control plane).
|
|
type Transport interface {
|
|
// SendAuditLog sends an audit log to a consumer of audit logs.
|
|
// Errors should be checked with [IsRetryableError] for retryability.
|
|
SendAuditLog(context.Context, tailcfg.AuditLogRequest) error
|
|
}
|
|
|
|
// LogStore provides a means for a [Logger] to persist logs to disk or memory.
|
|
type LogStore interface {
|
|
// Save saves the given data to a persistent store. Save will overwrite existing data
|
|
// for the given key.
|
|
save(key ipn.ProfileID, txns []*transaction) error
|
|
|
|
// Load retrieves the data from a persistent store. Returns a nil slice and
|
|
// no error if no data exists for the given key.
|
|
load(key ipn.ProfileID) ([]*transaction, error)
|
|
}
|
|
|
|
// Opts contains the configuration options for a [Logger].
|
|
type Opts struct {
|
|
// RetryLimit is the maximum number of attempts the logger will make to send a log before giving up.
|
|
RetryLimit int
|
|
// Store is the persistent store used to save logs to disk. Must be non-nil.
|
|
Store LogStore
|
|
// Logf is the logger used to log messages from the audit logger. Must be non-nil.
|
|
Logf logger.Logf
|
|
}
|
|
|
|
// IsRetryableError returns true if the given error is retryable
|
|
// See [controlclient.apiResponseError]. Potentially retryable errors implement the Retryable() method.
|
|
func IsRetryableError(err error) bool {
|
|
var retryable interface{ Retryable() bool }
|
|
return errors.As(err, &retryable) && retryable.Retryable()
|
|
}
|
|
|
|
type backoffOpts struct {
|
|
min, max time.Duration
|
|
multiplier float64
|
|
}
|
|
|
|
// .5, 1, 2, 4, 8, 10, 10, 10, 10, 10...
|
|
var defaultBackoffOpts = backoffOpts{
|
|
min: time.Millisecond * 500,
|
|
max: 10 * time.Second,
|
|
multiplier: 2,
|
|
}
|
|
|
|
// Logger provides a queue-based mechanism for submitting audit logs to the control plane - or
|
|
// another suitable consumer. Logs are stored to disk and retried until they are successfully sent,
|
|
// or until they permanently fail.
|
|
//
|
|
// Each individual profile/controlclient tuple should construct and manage a unique [Logger] instance.
|
|
type Logger struct {
|
|
logf logger.Logf
|
|
retryLimit int // the maximum number of attempts to send a log before giving up.
|
|
flusher chan struct{} // channel used to signal a flush operation.
|
|
done chan struct{} // closed when the flush worker exits.
|
|
ctx context.Context // canceled when the logger is stopped.
|
|
ctxCancel context.CancelFunc // cancels ctx.
|
|
backoffOpts // backoff settings for retry operations.
|
|
|
|
// mu protects the fields below.
|
|
mu sync.Mutex
|
|
store LogStore // persistent storage for unsent logs.
|
|
profileID ipn.ProfileID // empty if [Logger.SetProfileID] has not been called.
|
|
transport Transport // nil until [Logger.Start] is called.
|
|
}
|
|
|
|
// NewLogger creates a new [Logger] with the given options.
|
|
func NewLogger(opts Opts) *Logger {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
al := &Logger{
|
|
retryLimit: opts.RetryLimit,
|
|
logf: opts.Logf,
|
|
store: opts.Store,
|
|
flusher: make(chan struct{}, 1),
|
|
done: make(chan struct{}),
|
|
ctx: ctx,
|
|
ctxCancel: cancel,
|
|
backoffOpts: defaultBackoffOpts,
|
|
}
|
|
al.logf("created")
|
|
return al
|
|
}
|
|
|
|
// FlushAndStop synchronously flushes all pending logs and stops the audit logger.
|
|
// This will block until a final flush operation completes or context is done.
|
|
// If the logger is already stopped, this will return immediately. All unsent
|
|
// logs will be persisted to the store.
|
|
func (al *Logger) FlushAndStop(ctx context.Context) {
|
|
al.stop()
|
|
al.flush(ctx)
|
|
}
|
|
|
|
// SetProfileID sets the profileID for the logger. This must be called before any logs can be enqueued.
|
|
// The profileID of a logger cannot be changed once set.
|
|
func (al *Logger) SetProfileID(profileID ipn.ProfileID) error {
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
// It's not an error to call SetProfileID more than once
|
|
// with the same [ipn.ProfileID].
|
|
if al.profileID != "" && al.profileID != profileID {
|
|
return errors.New("profileID cannot be changed once set")
|
|
}
|
|
|
|
al.profileID = profileID
|
|
return nil
|
|
}
|
|
|
|
// Start starts the audit logger with the given transport.
|
|
// It returns an error if the logger is already started.
|
|
func (al *Logger) Start(t Transport) error {
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
|
|
if al.transport != nil {
|
|
return errors.New("already started")
|
|
}
|
|
|
|
al.transport = t
|
|
pending, err := al.storedCountLocked()
|
|
if err != nil {
|
|
al.logf("[unexpected] failed to restore logs: %v", err)
|
|
}
|
|
go al.flushWorker()
|
|
if pending > 0 {
|
|
al.flushAsync()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ErrAuditLogStorageFailure is returned when the logger fails to persist logs to the store.
|
|
var ErrAuditLogStorageFailure = errors.New("audit log storage failure")
|
|
|
|
// Enqueue queues an audit log to be sent to the control plane (or another suitable consumer/transport).
|
|
// This will return an error if the underlying store fails to save the log or we fail to generate a unique
|
|
// eventID for the log.
|
|
func (al *Logger) Enqueue(action tailcfg.ClientAuditAction, details string) error {
|
|
txn := &transaction{
|
|
Action: action,
|
|
Details: details,
|
|
TimeStamp: time.Now(),
|
|
}
|
|
// Generate a suitably random eventID for the transaction.
|
|
txn.EventID = fmt.Sprint(txn.TimeStamp, rands.HexString(16))
|
|
return al.enqueue(txn)
|
|
}
|
|
|
|
// flushAsync requests an asynchronous flush.
|
|
// It is a no-op if a flush is already pending.
|
|
func (al *Logger) flushAsync() {
|
|
select {
|
|
case al.flusher <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
|
|
func (al *Logger) flushWorker() {
|
|
defer close(al.done)
|
|
|
|
var retryDelay time.Duration
|
|
retry := time.NewTimer(0)
|
|
retry.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-al.ctx.Done():
|
|
return
|
|
case <-al.flusher:
|
|
err := al.flush(al.ctx)
|
|
switch {
|
|
case errors.Is(err, context.Canceled):
|
|
// The logger was stopped, no need to retry.
|
|
return
|
|
case err != nil:
|
|
retryDelay = max(al.backoffOpts.min, min(retryDelay*time.Duration(al.backoffOpts.multiplier), al.backoffOpts.max))
|
|
al.logf("retrying after %v, %v", retryDelay, err)
|
|
retry.Reset(retryDelay)
|
|
default:
|
|
retryDelay = 0
|
|
retry.Stop()
|
|
}
|
|
case <-retry.C:
|
|
al.flushAsync()
|
|
}
|
|
}
|
|
}
|
|
|
|
// flush attempts to send all pending logs to the control plane.
|
|
// l.mu must not be held.
|
|
func (al *Logger) flush(ctx context.Context) error {
|
|
al.mu.Lock()
|
|
pending, err := al.store.load(al.profileID)
|
|
t := al.transport
|
|
al.mu.Unlock()
|
|
|
|
if err != nil {
|
|
// This will catch nil profileIDs
|
|
return fmt.Errorf("failed to restore pending logs: %w", err)
|
|
}
|
|
if len(pending) == 0 {
|
|
return nil
|
|
}
|
|
if t == nil {
|
|
return errors.New("no transport")
|
|
}
|
|
|
|
complete, unsent := al.sendToTransport(ctx, pending, t)
|
|
al.markTransactionsDone(complete)
|
|
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
if err = al.appendToStoreLocked(unsent); err != nil {
|
|
al.logf("[unexpected] failed to persist logs: %v", err)
|
|
}
|
|
|
|
if len(unsent) != 0 {
|
|
return fmt.Errorf("failed to send %d logs", len(unsent))
|
|
}
|
|
|
|
if len(complete) != 0 {
|
|
al.logf("complete %d audit log transactions", len(complete))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// sendToTransport sends all pending logs to the control plane. Returns a pair of slices
|
|
// containing the logs that were successfully sent (or failed permanently) and those that were not.
|
|
//
|
|
// This may require multiple round trips to the control plane and can be a long running transaction.
|
|
func (al *Logger) sendToTransport(ctx context.Context, pending []*transaction, t Transport) (complete []*transaction, unsent []*transaction) {
|
|
for i, txn := range pending {
|
|
req := tailcfg.AuditLogRequest{
|
|
Action: tailcfg.ClientAuditAction(txn.Action),
|
|
Details: txn.Details,
|
|
Timestamp: txn.TimeStamp,
|
|
}
|
|
|
|
if err := t.SendAuditLog(ctx, req); err != nil {
|
|
switch {
|
|
case errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded):
|
|
// The contex is done. All further attempts will fail.
|
|
unsent = append(unsent, pending[i:]...)
|
|
return complete, unsent
|
|
case IsRetryableError(err) && txn.Retries+1 < al.retryLimit:
|
|
// We permit a maximum number of retries for each log. All retriable
|
|
// errors should be transient and we should be able to send the log eventually, but
|
|
// we don't want logs to be persisted indefinitely.
|
|
txn.Retries++
|
|
unsent = append(unsent, txn)
|
|
default:
|
|
complete = append(complete, txn)
|
|
al.logf("failed permanently: %v", err)
|
|
}
|
|
} else {
|
|
// No error - we're done.
|
|
complete = append(complete, txn)
|
|
}
|
|
}
|
|
|
|
return complete, unsent
|
|
}
|
|
|
|
func (al *Logger) stop() {
|
|
al.mu.Lock()
|
|
t := al.transport
|
|
al.mu.Unlock()
|
|
|
|
if t == nil {
|
|
// No transport means no worker goroutine and done will not be
|
|
// closed if we cancel the context.
|
|
return
|
|
}
|
|
|
|
al.ctxCancel()
|
|
<-al.done
|
|
al.logf("stopped for profileID: %v", al.profileID)
|
|
}
|
|
|
|
// appendToStoreLocked persists logs to the store. This will deduplicate
|
|
// logs so it is safe to call this with the same logs multiple time, to
|
|
// requeue failed transactions for example.
|
|
//
|
|
// l.mu must be held.
|
|
func (al *Logger) appendToStoreLocked(txns []*transaction) error {
|
|
if len(txns) == 0 {
|
|
return nil
|
|
}
|
|
|
|
if al.profileID == "" {
|
|
return errors.New("no logId set")
|
|
}
|
|
|
|
persisted, err := al.store.load(al.profileID)
|
|
if err != nil {
|
|
al.logf("[unexpected] append failed to restore logs: %v", err)
|
|
}
|
|
|
|
// The order is important here. We want the latest transactions first, which will
|
|
// ensure when we dedup, the new transactions are seen and the older transactions
|
|
// are discarded.
|
|
txnsOut := append(txns, persisted...)
|
|
txnsOut = deduplicateAndSort(txnsOut)
|
|
|
|
return al.store.save(al.profileID, txnsOut)
|
|
}
|
|
|
|
// storedCountLocked returns the number of logs persisted to the store.
|
|
// al.mu must be held.
|
|
func (al *Logger) storedCountLocked() (int, error) {
|
|
persisted, err := al.store.load(al.profileID)
|
|
return len(persisted), err
|
|
}
|
|
|
|
// markTransactionsDone removes logs from the store that are complete (sent or failed permanently).
|
|
// al.mu must not be held.
|
|
func (al *Logger) markTransactionsDone(sent []*transaction) {
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
|
|
ids := set.Set[string]{}
|
|
for _, txn := range sent {
|
|
ids.Add(txn.EventID)
|
|
}
|
|
|
|
persisted, err := al.store.load(al.profileID)
|
|
if err != nil {
|
|
al.logf("[unexpected] markTransactionsDone failed to restore logs: %v", err)
|
|
}
|
|
var unsent []*transaction
|
|
for _, txn := range persisted {
|
|
if !ids.Contains(txn.EventID) {
|
|
unsent = append(unsent, txn)
|
|
}
|
|
}
|
|
al.store.save(al.profileID, unsent)
|
|
}
|
|
|
|
// deduplicateAndSort removes duplicate logs from the given slice and sorts them by timestamp.
|
|
// The first log entry in the slice will be retained, subsequent logs with the same EventID will be discarded.
|
|
func deduplicateAndSort(txns []*transaction) []*transaction {
|
|
seen := set.Set[string]{}
|
|
deduped := make([]*transaction, 0, len(txns))
|
|
for _, txn := range txns {
|
|
if !seen.Contains(txn.EventID) {
|
|
deduped = append(deduped, txn)
|
|
seen.Add(txn.EventID)
|
|
}
|
|
}
|
|
// Sort logs by timestamp - oldest to newest. This will put the oldest logs at
|
|
// the front of the queue.
|
|
sort.Slice(deduped, func(i, j int) bool {
|
|
return deduped[i].TimeStamp.Before(deduped[j].TimeStamp)
|
|
})
|
|
return deduped
|
|
}
|
|
|
|
func (al *Logger) enqueue(txn *transaction) error {
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
|
|
if err := al.appendToStoreLocked([]*transaction{txn}); err != nil {
|
|
return fmt.Errorf("%w: %w", ErrAuditLogStorageFailure, err)
|
|
}
|
|
|
|
// If a.transport is nil if the logger is stopped.
|
|
if al.transport != nil {
|
|
al.flushAsync()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
var _ LogStore = (*logStateStore)(nil)
|
|
|
|
// logStateStore is a concrete implementation of [LogStore]
|
|
// using [ipn.StateStore] as the underlying storage.
|
|
type logStateStore struct {
|
|
store ipn.StateStore
|
|
}
|
|
|
|
// NewLogStore creates a new LogStateStore with the given [ipn.StateStore].
|
|
func NewLogStore(store ipn.StateStore) LogStore {
|
|
return &logStateStore{
|
|
store: store,
|
|
}
|
|
}
|
|
|
|
func (s *logStateStore) generateKey(key ipn.ProfileID) string {
|
|
return "auditlog-" + string(key)
|
|
}
|
|
|
|
// Save saves the given logs to an [ipn.StateStore]. This overwrites
|
|
// any existing entries for the given key.
|
|
func (s *logStateStore) save(key ipn.ProfileID, txns []*transaction) error {
|
|
if key == "" {
|
|
return errors.New("empty key")
|
|
}
|
|
|
|
data, err := json.Marshal(txns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
k := ipn.StateKey(s.generateKey(key))
|
|
return s.store.WriteState(k, data)
|
|
}
|
|
|
|
// Load retrieves the logs from an [ipn.StateStore].
|
|
func (s *logStateStore) load(key ipn.ProfileID) ([]*transaction, error) {
|
|
if key == "" {
|
|
return nil, errors.New("empty key")
|
|
}
|
|
|
|
k := ipn.StateKey(s.generateKey(key))
|
|
data, err := s.store.ReadState(k)
|
|
|
|
switch {
|
|
case errors.Is(err, ipn.ErrStateNotExist):
|
|
return nil, nil
|
|
case err != nil:
|
|
return nil, err
|
|
}
|
|
|
|
var txns []*transaction
|
|
err = json.Unmarshal(data, &txns)
|
|
return txns, err
|
|
}
|