From 266d02d7d832ed0aceaa1db7d7e0bdc2f8660c8d Mon Sep 17 00:00:00 2001 From: Evgenii Guguchkin Date: Thu, 20 Nov 2025 10:08:11 +0300 Subject: [PATCH] feat(fracmanager) add handling for upload queue overflow and disk space exhaustion --- cmd/seq-db/seq-db.go | 7 ++- config/config.go | 12 +++- fracmanager/config.go | 9 ++- fracmanager/fraction_registry.go | 72 +++++++++++++++++++++++ fracmanager/lifecycle_manager.go | 82 ++++++++++++++++++++++----- fracmanager/lifecycle_manager_test.go | 2 +- fracmanager/proxy_frac.go | 19 +++++++ fracmanager/tasks.go | 67 ++++++++++++++++++++++ 8 files changed, 251 insertions(+), 19 deletions(-) create mode 100644 fracmanager/tasks.go diff --git a/cmd/seq-db/seq-db.go b/cmd/seq-db/seq-db.go index a32d6ff4..549f6107 100644 --- a/cmd/seq-db/seq-db.go +++ b/cmd/seq-db/seq-db.go @@ -253,6 +253,7 @@ func startStore( DataDir: cfg.Storage.DataDir, FracSize: uint64(cfg.Storage.FracSize), TotalSize: uint64(cfg.Storage.TotalSize), + SealingQueueLen: uint64(cfg.Storage.SealingQueueLen), CacheSize: uint64(cfg.Resources.CacheSize), SortCacheSize: uint64(cfg.Resources.SortDocsCacheSize), ReplayWorkers: cfg.Resources.ReplayWorkers, @@ -280,8 +281,10 @@ func startStore( SkipSortDocs: !cfg.DocsSorting.Enabled, KeepMetaFile: false, }, - OffloadingEnabled: cfg.Offloading.Enabled, - OffloadingRetention: cfg.Offloading.Retention, + OffloadingEnabled: cfg.Offloading.Enabled, + OffloadingRetention: cfg.Offloading.Retention, + OffloadingRetryDelay: cfg.Offloading.RetryDelay, + OffloadingQueueSize: uint64(float64(cfg.Storage.TotalSize) * cfg.Offloading.QueueSizePercent / 100), }, API: storeapi.APIConfig{ StoreMode: configMode, diff --git a/config/config.go b/config/config.go index 29c285d9..350d26ff 100644 --- a/config/config.go +++ b/config/config.go @@ -63,6 +63,11 @@ type Config struct { // TotalSize specifies upper bound of how much disk space can be occupied // by sealed fractions before they get deleted (or offloaded). TotalSize Bytes `config:"total_size" default:"1GiB"` + // SealingQueueLen defines the maximum length of the sealing queue. + // If the queue size exceeds this limit, writing to the store will be paused, + // and bulk requests will start returning errors. + // A value of zero disables this limit, allowing writes to proceed unconditionally. + SealingQueueLen int `config:"sealing_queue_len" default:"10"` } `config:"storage"` Cluster struct { @@ -234,8 +239,13 @@ type Config struct { // You can learn more about secret keys [here](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). SecretKey string `config:"secret_key"` // RetryCount sets [RetryMaxAttempts] for S3 client which is applied for all API calls. - // Be aware that fraction is suicided when offloading attempts exceeds [RetryCount]. RetryCount int `config:"retry_count" default:"5"` + // Specifies the percentage of total local dataset size allocated to the offloading queue. + // Note: When the queue overflows, the oldest fraction of data is automatically removed. + // This automatic removal is disabled when set to zero. + QueueSizePercent float64 `config:"queue_size_percent" default:"5"` + // Delay duration between consecutive offloading retries + RetryDelay time.Duration `config:"retry_delay" default:"2s"` } `config:"offloading"` AsyncSearch struct { diff --git a/fracmanager/config.go b/fracmanager/config.go index de96c957..5f3ae386 100644 --- a/fracmanager/config.go +++ b/fracmanager/config.go @@ -19,6 +19,9 @@ type Config struct { TotalSize uint64 CacheSize uint64 + SuspendThreshold uint64 + SealingQueueLen uint64 + ReplayWorkers int MaintenanceDelay time.Duration CacheCleanupDelay time.Duration @@ -28,8 +31,10 @@ type Config struct { Fraction frac.Config MinSealFracSize uint64 - OffloadingEnabled bool - OffloadingRetention time.Duration + OffloadingEnabled bool + OffloadingQueueSize uint64 + OffloadingRetention time.Duration + OffloadingRetryDelay time.Duration } func FillConfigWithDefault(config *Config) *Config { diff --git a/fracmanager/fraction_registry.go b/fracmanager/fraction_registry.go index db39d821..103c42e7 100644 --- a/fracmanager/fraction_registry.go +++ b/fracmanager/fraction_registry.go @@ -129,6 +129,8 @@ func (r *fractionRegistry) RotateIfFull(maxSize uint64, newActive func() *active curInfo := old.instance.Info() r.stats.sealing.Add(curInfo) + r.active.Suspend(old.Suspended()) + wg := sync.WaitGroup{} wg.Add(1) // since old.WaitWriteIdle() can take some time, we don't want to do it under the lock @@ -151,6 +153,31 @@ func (r *fractionRegistry) RotateIfFull(maxSize uint64, newActive func() *active return old, wg.Wait, nil } +func (r *fractionRegistry) SuspendIfOverCapacity(maxQueue, maxSize uint64) bool { + r.mu.Lock() + defer r.mu.Unlock() + + if maxQueue > 0 && r.stats.sealing.count >= int(maxQueue) { + r.active.Suspend(true) + return true + } + + if maxSize > 0 && r.diskUsage() > maxSize { + r.active.Suspend(true) + return true + } + + r.active.Suspend(false) + return false +} + +func (r *fractionRegistry) diskUsage() uint64 { + return r.active.instance.Info().FullSize() + + r.stats.sealed.totalSizeOnDisk + + r.stats.sealing.totalSizeOnDisk + + r.stats.offloading.totalSizeOnDisk +} + // addActive sets a new active fraction and updates the complete fractions list. func (r *fractionRegistry) addActive(a *activeProxy) { r.muAll.Lock() @@ -227,6 +254,10 @@ func (r *fractionRegistry) EvictLocal(shouldOffload bool, sizeLimit uint64) ([]* // Fractions older than retention period are permanently deleted. // Returns removed fractions or empty slice if nothing to remove. func (r *fractionRegistry) EvictRemote(retention time.Duration) []*remoteProxy { + if retention == 0 { + return nil + } + r.mu.Lock() defer r.mu.Unlock() @@ -248,6 +279,42 @@ func (r *fractionRegistry) EvictRemote(retention time.Duration) []*remoteProxy { return evicted } +// EvictOverflowed removes oldest fractions from offloading queue when it exceeds size limit. +// Selects fractions that haven't finished offloading yet to minimize data loss. +// Used when offloading queue grows too large due to slow remote storage performance. +func (r *fractionRegistry) EvictOverflowed(sizeLimit uint64) []*sealedProxy { + if sizeLimit == 0 { + return nil + } + + r.mu.Lock() + defer r.mu.Unlock() + + // Fast path: skip processing if within size limits + if r.stats.offloading.totalSizeOnDisk <= sizeLimit { + return nil + } + + count := 0 + evicted := []*sealedProxy{} + // filter fractions + for _, item := range r.offloading { + // keep items that are within limits or already offloaded + if r.stats.offloading.totalSizeOnDisk <= sizeLimit || item.remote != nil { + r.offloading[count] = item + count++ + continue + } + evicted = append(evicted, item) + r.stats.offloading.Sub(item.instance.Info()) + } + + r.offloading = r.offloading[:count] + r.rebuildAllFractions() + + return evicted +} + // PromoteToSealed moves fractions from sealing to local queue when sealing completes. // Maintains strict ordering - younger fractions wait for older ones to seal first. func (r *fractionRegistry) PromoteToSealed(active *activeProxy, sealed *frac.Sealed) { @@ -322,6 +389,11 @@ func (r *fractionRegistry) removeFromOffloading(sealed *sealedProxy) { count++ } } + + if count == len(r.offloading) { // not found to remove (can be removed earlier in EvictOverflowed) + return + } + r.offloading = r.offloading[:count] r.stats.offloading.Sub(sealed.instance.Info()) diff --git a/fracmanager/lifecycle_manager.go b/fracmanager/lifecycle_manager.go index 76594f0e..2a0e8615 100644 --- a/fracmanager/lifecycle_manager.go +++ b/fracmanager/lifecycle_manager.go @@ -21,6 +21,7 @@ type lifecycleManager struct { provider *fractionProvider // provider for fraction operations flags *StateManager // storage state flags registry *fractionRegistry // fraction state registry + tasks *TaskManager // Background offloading tasks sealingWg sync.WaitGroup } @@ -36,18 +37,26 @@ func newLifecycleManager( provider: provider, flags: flags, registry: registry, + tasks: NewTaskManager(), } } // Maintain performs periodic lifecycle management tasks. // It coordinates rotation, offloading, cleanup based on configuration. -func (lc *lifecycleManager) Maintain(ctx context.Context, config *Config, wg *sync.WaitGroup) { - lc.rotate(config.FracSize, wg) - if config.OffloadingEnabled { - lc.offloadLocal(ctx, config.TotalSize, wg) - lc.cleanRemote(config.OffloadingRetention, wg) +func (lc *lifecycleManager) Maintain(ctx context.Context, cfg *Config, wg *sync.WaitGroup) { + + suspendThreshold := cfg.TotalSize + cfg.TotalSize/100 + cfg.OffloadingQueueSize + lc.registry.SuspendIfOverCapacity(cfg.SealingQueueLen, suspendThreshold) + + lc.rotate(cfg.FracSize, wg) + if cfg.OffloadingEnabled { + lc.offloadLocal(ctx, cfg.TotalSize, cfg.OffloadingRetryDelay, wg) + if cfg.OffloadingQueueSize > 0 { + lc.removeOverflowed(cfg.OffloadingQueueSize, wg) + } + lc.cleanRemote(cfg.OffloadingRetention, wg) } else { - lc.cleanLocal(config.TotalSize, wg) + lc.cleanLocal(cfg.TotalSize, wg) } lc.updateOldestMetric() lc.SyncInfoCache() @@ -113,17 +122,18 @@ func (lc *lifecycleManager) rotate(maxSize uint64, wg *sync.WaitGroup) { // offloadLocal starts offloading of local fractions to remote storage. // Selects fractions based on disk space usage and retention policy. -func (lc *lifecycleManager) offloadLocal(ctx context.Context, sizeLimit uint64, wg *sync.WaitGroup) { +func (lc *lifecycleManager) offloadLocal(ctx context.Context, sizeLimit uint64, retryDelay time.Duration, wg *sync.WaitGroup) { toOffload, err := lc.registry.EvictLocal(true, sizeLimit) if err != nil { logger.Fatal("error releasing old fractions:", zap.Error(err)) } for _, sealed := range toOffload { wg.Add(1) - go func() { + lc.tasks.Run(sealed.instance.BaseFileName, ctx, func(ctx context.Context) { defer wg.Done() - remote, _ := lc.tryOffload(ctx, sealed.instance) + remote := lc.offloadWithRetry(ctx, sealed.instance, retryDelay) + lc.registry.PromoteToRemote(sealed, remote) if remote == nil { @@ -136,7 +146,41 @@ func (lc *lifecycleManager) offloadLocal(ctx context.Context, sizeLimit uint64, // free up local resources sealed.instance.Suicide() maintenanceTruncateTotal.Add(1) - }() + }) + } +} + +// OffloadWithRetry attempts to offload a fraction with retries until success or cancellation. +// Returns the remote fraction instance and a boolean indicating whether offloading was not canceled. +func (lc *lifecycleManager) offloadWithRetry(ctx context.Context, sealed *frac.Sealed, retryDelay time.Duration) *frac.Remote { + start := time.Now() + for i := 0; ; i++ { + remote, err := lc.tryOffload(ctx, sealed) + if err == nil { + return remote + } + + logger.Warn( + "fail to offload fraction", + zap.String("name", sealed.BaseFileName), + zap.Duration("offloading_time", time.Since(start)), + zap.Int("attempts", i), + zap.Error(err), + ) + + select { + case <-ctx.Done(): + logger.Info( + "fraction offloading was stopped", + zap.String("name", sealed.BaseFileName), + zap.Duration("offloading_time", time.Since(start)), + zap.Int("attempts", i), + zap.Error(ctx.Err()), + ) + return nil + case <-time.After(retryDelay): + // Wait before next retry attempt + } } } @@ -163,9 +207,6 @@ func (lc *lifecycleManager) tryOffload(ctx context.Context, sealed *frac.Sealed) // cleanRemote deletes outdated remote fractions based on retention policy. func (lc *lifecycleManager) cleanRemote(retention time.Duration, wg *sync.WaitGroup) { - if retention == 0 { - return - } toDelete := lc.registry.EvictRemote(retention) wg.Add(1) go func() { @@ -207,3 +248,18 @@ func (lc *lifecycleManager) updateOldestMetric() { oldestFracTime.WithLabelValues("remote").Set((time.Duration(lc.registry.OldestTotal()) * time.Millisecond).Seconds()) oldestFracTime.WithLabelValues("local").Set((time.Duration(lc.registry.OldestLocal()) * time.Millisecond).Seconds()) } + +// removeOverflowed removes fractions from offloading queue that exceed size limit +// Stops ongoing offloading tasks and cleans up both local and remote resources. +func (lc *lifecycleManager) removeOverflowed(sizeLimit uint64, wg *sync.WaitGroup) { + evicted := lc.registry.EvictOverflowed(sizeLimit) + for _, item := range evicted { + wg.Add(1) + go func() { + defer wg.Done() + // Cancel the offloading task - this operation may take significant time + // hence executed in a separate goroutine to avoid blocking + lc.tasks.Cancel(item.instance.BaseFileName) + }() + } +} diff --git a/fracmanager/lifecycle_manager_test.go b/fracmanager/lifecycle_manager_test.go index b7afb715..abd180e2 100644 --- a/fracmanager/lifecycle_manager_test.go +++ b/fracmanager/lifecycle_manager_test.go @@ -150,7 +150,7 @@ func TestOldestMetrics(t *testing.T) { } wg := sync.WaitGroup{} - lc.offloadLocal(t.Context(), total-halfSize, &wg) + lc.offloadLocal(t.Context(), total-halfSize, 0, &wg) wg.Wait() // Check state after offloading diff --git a/fracmanager/proxy_frac.go b/fracmanager/proxy_frac.go index 949e2412..ffc31854 100644 --- a/fracmanager/proxy_frac.go +++ b/fracmanager/proxy_frac.go @@ -23,6 +23,7 @@ var ( _ frac.Fraction = (*emptyFraction)(nil) ErrFractionNotWritable = errors.New("fraction is not writable") + ErrFractionSuspended = errors.New("write operations temporarily suspended - database capacity exceeded") ) // fractionProxy provides thread-safe access to a fraction with atomic replacement @@ -81,6 +82,7 @@ type activeProxy struct { wg sync.WaitGroup // Tracks pending write operations finalized bool // Whether fraction is frozen for writes + suspended bool // Temporarily suspended for writes } func newActiveProxy(active *frac.Active) *activeProxy { @@ -97,6 +99,10 @@ func (p *activeProxy) Append(docs, meta []byte) error { p.mu.RUnlock() return ErrFractionNotWritable } + if p.suspended { + p.mu.RUnlock() + return ErrFractionSuspended + } p.wg.Add(1) // Important: wg.Add() inside lock to prevent race with WaitWriteIdle() p.mu.RUnlock() @@ -115,6 +121,19 @@ func (p *activeProxy) WaitWriteIdle() { zap.Float64("time_wait_s", waitTime)) } +func (p *activeProxy) Suspended() bool { + p.mu.Lock() + defer p.mu.Unlock() + + return p.suspended +} + +func (p *activeProxy) Suspend(value bool) { + p.mu.Lock() + p.suspended = value + p.mu.Unlock() +} + // Finalize marks the fraction as read-only and prevents new writes from starting after finalize. func (p *activeProxy) Finalize() error { p.mu.Lock() diff --git a/fracmanager/tasks.go b/fracmanager/tasks.go new file mode 100644 index 00000000..ac11ea8d --- /dev/null +++ b/fracmanager/tasks.go @@ -0,0 +1,67 @@ +package fracmanager + +import ( + "context" + "sync" +) + +// task represents a cancellable background task with synchronization +// Used for managing long-running operations like offloading fractions. +// Lifecycle: Created via Tasks.Run(), cancelled via Tasks.Cancel(), cleaned up on completion. +type task struct { + wg sync.WaitGroup // Synchronizes task completion + ctx context.Context // Context for cancellation + cancel context.CancelFunc // Function to cancel the task +} + +// TaskManager manages a collection of running background tasks +// Provides safe concurrent access to task tracking and cancellation. +type TaskManager struct { + mu sync.Mutex + running map[string]*task // Map of task ID to task instance +} + +func NewTaskManager() *TaskManager { + return &TaskManager{ + running: make(map[string]*task), + } +} + +// Run starts a new background task with the given ID and context. +// The task will be automatically removed when completed. +func (t *TaskManager) Run(id string, ctx context.Context, action func(ctx context.Context)) *task { + task := &task{} + task.ctx, task.cancel = context.WithCancel(ctx) + + t.mu.Lock() + t.running[id] = task + t.mu.Unlock() + + task.wg.Add(1) + go func() { + defer func() { + t.mu.Lock() + delete(t.running, id) + t.mu.Unlock() + + task.wg.Done() + }() + + action(task.ctx) + }() + + return task +} + +// Cancel cancels and waits for completion of a task by ID +// Returns immediately if task with given ID doesn't exist. +func (t *TaskManager) Cancel(id string) { + t.mu.Lock() + task, ok := t.running[id] + t.mu.Unlock() + + if ok { + task.cancel() + task.wg.Wait() + } +}