mirror of
				https://github.com/minio/minio.git
				synced 2025-11-04 02:01:05 +01:00 
			
		
		
		
	close the file properly before dereferencing *os.File, this can silently leak fd's in rare cases. This PR fixes this properly.
		
			
				
	
	
		
			296 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			296 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright (c) 2015-2021 MinIO, Inc.
 | 
						|
//
 | 
						|
// This file is part of MinIO Object Storage stack
 | 
						|
//
 | 
						|
// This program is free software: you can redistribute it and/or modify
 | 
						|
// it under the terms of the GNU Affero General Public License as published by
 | 
						|
// the Free Software Foundation, either version 3 of the License, or
 | 
						|
// (at your option) any later version.
 | 
						|
//
 | 
						|
// This program is distributed in the hope that it will be useful
 | 
						|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
// GNU Affero General Public License for more details.
 | 
						|
//
 | 
						|
// You should have received a copy of the GNU Affero General Public License
 | 
						|
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
package cmd
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"encoding/binary"
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"os"
 | 
						|
	"path/filepath"
 | 
						|
	"sync"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"github.com/minio/minio/internal/logger"
 | 
						|
)
 | 
						|
 | 
						|
//go:generate msgp -file $GOFILE -unexported
 | 
						|
//msgp:ignore TierJournal tierDiskJournal walkfn
 | 
						|
 | 
						|
type tierDiskJournal struct {
 | 
						|
	sync.RWMutex
 | 
						|
	diskPath string
 | 
						|
	file     *os.File // active journal file
 | 
						|
}
 | 
						|
 | 
						|
// TierJournal holds an in-memory and an on-disk delete journal of tiered content.
 | 
						|
type TierJournal struct {
 | 
						|
	*tierDiskJournal // for processing legacy journal entries
 | 
						|
	*tierMemJournal  // for processing new journal entries
 | 
						|
}
 | 
						|
 | 
						|
type jentry struct {
 | 
						|
	ObjName   string `msg:"obj"`
 | 
						|
	VersionID string `msg:"vid"`
 | 
						|
	TierName  string `msg:"tier"`
 | 
						|
}
 | 
						|
 | 
						|
const (
 | 
						|
	tierJournalVersion = 1
 | 
						|
	tierJournalHdrLen  = 2 // 2 bytes
 | 
						|
)
 | 
						|
 | 
						|
var errUnsupportedJournalVersion = errors.New("unsupported pending deletes journal version")
 | 
						|
 | 
						|
func newTierDiskJournal() *tierDiskJournal {
 | 
						|
	return &tierDiskJournal{}
 | 
						|
}
 | 
						|
 | 
						|
// NewTierJournal initializes tier deletion journal
 | 
						|
func NewTierJournal() *TierJournal {
 | 
						|
	j := &TierJournal{
 | 
						|
		tierMemJournal:  newTierMemJournal(1000),
 | 
						|
		tierDiskJournal: newTierDiskJournal(),
 | 
						|
	}
 | 
						|
	return j
 | 
						|
}
 | 
						|
 | 
						|
// Init intializes an in-memory journal built using a
 | 
						|
// buffered channel for new journal entries. It also initializes the on-disk
 | 
						|
// journal only to process existing journal entries made from previous versions.
 | 
						|
func (t *TierJournal) Init(ctx context.Context) error {
 | 
						|
	for _, diskPath := range globalEndpoints.LocalDisksPaths() {
 | 
						|
		t.diskPath = diskPath
 | 
						|
 | 
						|
		go t.deletePending(ctx)  // for existing journal entries from previous MinIO versions
 | 
						|
		go t.processEntries(ctx) // for newer journal entries circa free-versions
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	return errors.New("no local drive found")
 | 
						|
}
 | 
						|
 | 
						|
// rotate rotates the journal. If a read-only journal already exists it does
 | 
						|
// nothing. Otherwise renames the active journal to a read-only journal and
 | 
						|
// opens a new active journal.
 | 
						|
func (jd *tierDiskJournal) rotate() error {
 | 
						|
	// Do nothing if a read-only journal file already exists.
 | 
						|
	if _, err := os.Stat(jd.ReadOnlyPath()); err == nil {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	// Close the active journal if present and delete it.
 | 
						|
	return jd.Close()
 | 
						|
}
 | 
						|
 | 
						|
type walkFn func(ctx context.Context, objName, rvID, tierName string) error
 | 
						|
 | 
						|
func (jd *tierDiskJournal) ReadOnlyPath() string {
 | 
						|
	return filepath.Join(jd.diskPath, minioMetaBucket, "ilm", "deletion-journal.ro.bin")
 | 
						|
}
 | 
						|
 | 
						|
func (jd *tierDiskJournal) JournalPath() string {
 | 
						|
	return filepath.Join(jd.diskPath, minioMetaBucket, "ilm", "deletion-journal.bin")
 | 
						|
}
 | 
						|
 | 
						|
func (jd *tierDiskJournal) WalkEntries(ctx context.Context, fn walkFn) {
 | 
						|
	if err := jd.rotate(); err != nil {
 | 
						|
		logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to rotate pending deletes journal %s", err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	ro, err := jd.OpenRO()
 | 
						|
	switch {
 | 
						|
	case errors.Is(err, os.ErrNotExist):
 | 
						|
		return // No read-only journal to process; nothing to do.
 | 
						|
	case err != nil:
 | 
						|
		logger.LogIf(ctx, fmt.Errorf("tier-journal: failed open read-only journal for processing %s", err))
 | 
						|
		return
 | 
						|
	}
 | 
						|
	defer ro.Close()
 | 
						|
	mr := msgpNewReader(ro)
 | 
						|
	defer readMsgpReaderPoolPut(mr)
 | 
						|
 | 
						|
	done := false
 | 
						|
	for {
 | 
						|
		var entry jentry
 | 
						|
		err := entry.DecodeMsg(mr)
 | 
						|
		if errors.Is(err, io.EOF) {
 | 
						|
			done = true
 | 
						|
			break
 | 
						|
		}
 | 
						|
		if err != nil {
 | 
						|
			logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to decode journal entry %s", err))
 | 
						|
			break
 | 
						|
		}
 | 
						|
		err = fn(ctx, entry.ObjName, entry.VersionID, entry.TierName)
 | 
						|
		if err != nil && !isErrObjectNotFound(err) {
 | 
						|
			logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to delete transitioned object %s from %s due to %s", entry.ObjName, entry.TierName, err))
 | 
						|
			// We add the entry into the active journal to try again
 | 
						|
			// later.
 | 
						|
			jd.addEntry(entry)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if done {
 | 
						|
		os.Remove(jd.ReadOnlyPath())
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func deleteObjectFromRemoteTier(ctx context.Context, objName, rvID, tierName string) error {
 | 
						|
	w, err := globalTierConfigMgr.getDriver(tierName)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	err = w.Remove(ctx, objName, remoteVersionID(rvID))
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (jd *tierDiskJournal) deletePending(ctx context.Context) {
 | 
						|
	ticker := time.NewTicker(30 * time.Minute)
 | 
						|
	defer ticker.Stop()
 | 
						|
	for {
 | 
						|
		select {
 | 
						|
		case <-ticker.C:
 | 
						|
			jd.WalkEntries(ctx, deleteObjectFromRemoteTier)
 | 
						|
 | 
						|
		case <-ctx.Done():
 | 
						|
			jd.Close()
 | 
						|
			return
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (jd *tierDiskJournal) addEntry(je jentry) error {
 | 
						|
	// Open journal if it hasn't been
 | 
						|
	err := jd.Open()
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	b, err := je.MarshalMsg(nil)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	jd.Lock()
 | 
						|
	defer jd.Unlock()
 | 
						|
	_, err = jd.file.Write(b)
 | 
						|
	if err != nil {
 | 
						|
		// Do not leak fd here, close the file properly.
 | 
						|
		Fdatasync(jd.file)
 | 
						|
		_ = jd.file.Close()
 | 
						|
 | 
						|
		jd.file = nil // reset to allow subsequent reopen when file/disk is available.
 | 
						|
	}
 | 
						|
	return err
 | 
						|
}
 | 
						|
 | 
						|
// Close closes the active journal and renames it to read-only for pending
 | 
						|
// deletes processing. Note: calling Close on a closed journal is a no-op.
 | 
						|
func (jd *tierDiskJournal) Close() error {
 | 
						|
	jd.Lock()
 | 
						|
	defer jd.Unlock()
 | 
						|
	if jd.file == nil { // already closed
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	var (
 | 
						|
		f   *os.File
 | 
						|
		fi  os.FileInfo
 | 
						|
		err error
 | 
						|
	)
 | 
						|
	// Setting j.file to nil
 | 
						|
	f, jd.file = jd.file, f
 | 
						|
	if fi, err = f.Stat(); err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	f.Close() // close before rename()
 | 
						|
 | 
						|
	// Skip renaming active journal if empty.
 | 
						|
	if fi.Size() == tierJournalHdrLen {
 | 
						|
		return os.Remove(jd.JournalPath())
 | 
						|
	}
 | 
						|
 | 
						|
	jPath := jd.JournalPath()
 | 
						|
	jroPath := jd.ReadOnlyPath()
 | 
						|
	// Rotate active journal to perform pending deletes.
 | 
						|
	return os.Rename(jPath, jroPath)
 | 
						|
}
 | 
						|
 | 
						|
// Open opens a new active journal. Note: calling Open on an opened journal is a
 | 
						|
// no-op.
 | 
						|
func (jd *tierDiskJournal) Open() error {
 | 
						|
	jd.Lock()
 | 
						|
	defer jd.Unlock()
 | 
						|
	if jd.file != nil { // already open
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	var err error
 | 
						|
	jd.file, err = OpenFile(jd.JournalPath(), os.O_APPEND|os.O_CREATE|os.O_WRONLY|writeMode, 0o666)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	// write journal version header if active journal is empty
 | 
						|
	fi, err := jd.file.Stat()
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	if fi.Size() == 0 {
 | 
						|
		var data [tierJournalHdrLen]byte
 | 
						|
		binary.LittleEndian.PutUint16(data[:], tierJournalVersion)
 | 
						|
		_, err = jd.file.Write(data[:])
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (jd *tierDiskJournal) OpenRO() (io.ReadCloser, error) {
 | 
						|
	file, err := Open(jd.ReadOnlyPath())
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	// read journal version header
 | 
						|
	var data [tierJournalHdrLen]byte
 | 
						|
	if _, err := io.ReadFull(file, data[:]); err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	switch binary.LittleEndian.Uint16(data[:]) {
 | 
						|
	case tierJournalVersion:
 | 
						|
		return file, nil
 | 
						|
	default:
 | 
						|
		return nil, errUnsupportedJournalVersion
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// jentryV1 represents the entry in the journal before RemoteVersionID was
 | 
						|
// added. It remains here for use in tests for the struct element addition.
 | 
						|
type jentryV1 struct {
 | 
						|
	ObjName  string `msg:"obj"`
 | 
						|
	TierName string `msg:"tier"`
 | 
						|
}
 |