From 27e28db69b9c201879b4a5317dcc1ceb29546e83 Mon Sep 17 00:00:00 2001 From: mirkoCrobu Date: Tue, 2 Dec 2025 08:59:55 +0100 Subject: [PATCH 1/3] add file lock removal --- internal/orchestrator/system_properties/properties.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/orchestrator/system_properties/properties.go b/internal/orchestrator/system_properties/properties.go index 03083d890..4e4037b29 100644 --- a/internal/orchestrator/system_properties/properties.go +++ b/internal/orchestrator/system_properties/properties.go @@ -211,6 +211,9 @@ func getLock(flock *flock.Flock, lockFn lockFunc, errorMsg string) (UnlockFunc, if err := flock.Unlock(); err != nil { return fmt.Errorf("failed to unlock file lock for %s: %w", flock.Path(), err) } + if err := os.Remove(flock.Path()); err != nil && !os.IsNotExist(err) { + slog.Warn("failed to remove lock file", "path", flock.Path(), "error", err) + } return nil }, nil } From 0bdf8d9ecd762897994c732d8d9386faf603954a Mon Sep 17 00:00:00 2001 From: mirkoCrobu Date: Thu, 18 Dec 2025 12:37:44 +0100 Subject: [PATCH 2/3] Revert "add file lock removal" This reverts commit 27e28db69b9c201879b4a5317dcc1ceb29546e83. --- internal/orchestrator/system_properties/properties.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/internal/orchestrator/system_properties/properties.go b/internal/orchestrator/system_properties/properties.go index 4e4037b29..03083d890 100644 --- a/internal/orchestrator/system_properties/properties.go +++ b/internal/orchestrator/system_properties/properties.go @@ -211,9 +211,6 @@ func getLock(flock *flock.Flock, lockFn lockFunc, errorMsg string) (UnlockFunc, if err := flock.Unlock(); err != nil { return fmt.Errorf("failed to unlock file lock for %s: %w", flock.Path(), err) } - if err := os.Remove(flock.Path()); err != nil && !os.IsNotExist(err) { - slog.Warn("failed to remove lock file", "path", flock.Path(), "error", err) - } return nil }, nil } From 4477429fa51d2debdbeb06f0328ebbf7e8c7c14a Mon Sep 17 00:00:00 2001 From: mirkoCrobu Date: Thu, 18 Dec 2025 14:42:06 +0100 Subject: [PATCH 3/3] refactoring get lock file and add documentation --- .../system_properties/properties.go | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/internal/orchestrator/system_properties/properties.go b/internal/orchestrator/system_properties/properties.go index 03083d890..fe04940cb 100644 --- a/internal/orchestrator/system_properties/properties.go +++ b/internal/orchestrator/system_properties/properties.go @@ -56,6 +56,10 @@ func ReadPropertyKeys(filePath string) ([]string, error) { return mapKeys, err } +// We use renameio to ensure atomic writes. This prevents data corruption (partial writes) +// in case of a system crash or power loss during the save operation. +// NOTE: This mechanism changes the file's Inode on every write, which is why we cannot +// use the data file itself for file locking (flock). func UpsertProperty(filePath string, key string, value []byte) error { if err := validateKey(key); err != nil { return fmt.Errorf("%w: %w", ErrInvalidKey, err) @@ -184,6 +188,14 @@ func emptyUnlockFunc() error { return nil } +// getLock attempts to acquire a file lock. +// We explicitly do NOT remove the lock file in case of timeout or failure, nor after a successful unlock. +// Removing the lock file would introduce a "TOCTOU" (Time-of-check to Time-of-use) race condition: +// 1. Process A holds the lock on Inode X. +// 2. Process B times out and deletes the file (removing the directory entry for Inode X). +// 3. Process C creates a NEW lock file (Inode Y) and acquires the lock. +// Result: Process A and Process C would both hold valid locks on different Inodes, leading to data corruption. +// Therefore, we leave the file on disk; it acts as a persistent anchor for synchronization. func getLock(flock *flock.Flock, lockFn lockFunc, errorMsg string) (UnlockFunc, error) { ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() @@ -191,17 +203,9 @@ func getLock(flock *flock.Flock, lockFn lockFunc, errorMsg string) (UnlockFunc, locked, err := lockFn(ctx, 100*time.Millisecond) if err != nil { if errors.Is(err, context.DeadlineExceeded) { - if err := flock.Unlock(); err != nil { - slog.Error("failed to unlock file lock", "path", flock.Path(), "error", err) - } - if err := os.Remove(flock.Path()); err != nil { - slog.Error("failed to delete lock file", "path", flock.Path(), "error", err) - } - locked = false - slog.Warn("lock file removed due to timeout", "path", flock.Path()) - } else { - return emptyUnlockFunc, fmt.Errorf("failed trying to acquire %s for %s: %w", errorMsg, flock.Path(), err) + return emptyUnlockFunc, fmt.Errorf("timeout acquiring lock for %s", flock.Path()) } + return emptyUnlockFunc, fmt.Errorf("failed trying to acquire %s for %s: %w", errorMsg, flock.Path(), err) } if !locked { return emptyUnlockFunc, fmt.Errorf("unable to acquire %s for %s", errorMsg, flock.Path()) @@ -225,6 +229,10 @@ func getReadLock(filePath string) (UnlockFunc, error) { return getLock(fileLock, fileLock.TryRLockContext, "read lock") } +// getLockFilePath returns the path to a sidecar lock file (e.g., "data.json.lock"). +// We must use a separate file for locking because the main data file is written atomically +// (via renameio), which changes its Inode on every save. +// This sidecar file remains stable (same Inode) and acts as a persistent mutex anchor. func getLockFilePath(path string) string { return path + ".lock" }