Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/iocore/cache/CacheDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ enum CacheEventType {
CACHE_EVENT_SCAN_OPERATION_BLOCKED = CACHE_EVENT_EVENTS_START + 23,
CACHE_EVENT_SCAN_OPERATION_FAILED = CACHE_EVENT_EVENTS_START + 24,
CACHE_EVENT_SCAN_DONE = CACHE_EVENT_EVENTS_START + 25,
CACHE_EVENT_OPEN_DIR_RETRY = CACHE_EVENT_EVENTS_START + 26,
//////////////////////////
// Internal error codes //
//////////////////////////
Expand Down
61 changes: 39 additions & 22 deletions src/iocore/cache/Cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,28 @@ DbgCtl dbg_ctl_cache_init{"cache_init"};
DbgCtl dbg_ctl_cache_hosting{"cache_hosting"};
DbgCtl dbg_ctl_cache_update{"cache_update"};

CacheVC *
new_CacheVC_for_read(Continuation *cont, const CacheKey *key, CacheHTTPHdr *request, const HttpConfigAccessor *params,
StripeSM *stripe)
{
CacheVC *cache_vc = new_CacheVC(cont);

cache_vc->first_key = *key;
cache_vc->key = *key;
cache_vc->earliest_key = *key;
cache_vc->stripe = stripe;
cache_vc->vio.op = VIO::READ;
cache_vc->op_type = static_cast<int>(CacheOpType::Read);
cache_vc->frag_type = CACHE_FRAG_TYPE_HTTP;
cache_vc->params = params;
cache_vc->request.copy_shallow(request);

ts::Metrics::Gauge::increment(cache_rsb.status[cache_vc->op_type].active);
ts::Metrics::Gauge::increment(stripe->cache_vol->vol_rsb.status[cache_vc->op_type].active);

return cache_vc;
}

} // end anonymous namespace

// Global list of the volumes created
Expand Down Expand Up @@ -543,20 +565,24 @@ Cache::open_read(Continuation *cont, const CacheKey *key, CacheHTTPHdr *request,
OpenDirEntry *od = nullptr;
CacheVC *c = nullptr;

// Read-While-Writer
// This OpenDirEntry lookup doesn't need stripe mutex lock because OpenDir has own reader-writer lock
od = stripe->open_read(key);
Comment on lines +568 to +570
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential lock ordering issue: This code calls stripe->open_read() (which now acquires _shared_mutex as a reader) while holding stripe->mutex. In other places like CacheRead.cc line 212, the same pattern exists. However, in Cache.cc line 570, open_read() is called without holding stripe->mutex. While this change is intentional to reduce contention, ensure that this mixed lock ordering (sometimes stripe->mutex then _shared_mutex, sometimes just _shared_mutex) doesn't introduce potential deadlock scenarios. The Bravo reader-writer lock should handle this safely since multiple readers can coexist, but this should be verified through testing under high concurrency.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is fine, because prior to the change, when we jump to the CacheVC::openReadFromWriter from the Lwriter label, it's out of scope of StripeSM mutex lock. CacheVC::openReadFromWriter has another check of OpenDir.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

od = stripe->open_read(&first_key); // recheck in case the lock failed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, the reader lock of OpenDir might be need to be hold while this OpenDirEntry is used in the CacheVC::openReadFromWriter. Let me consider in case.

if (od != nullptr) {
c = new_CacheVC_for_read(cont, key, request, params, stripe);
c->od = od;
cont->handleEvent(CACHE_EVENT_OPEN_READ_RWW, nullptr);
SET_CONTINUATION_HANDLER(c, &CacheVC::openReadFromWriter);
if (c->handleEvent(EVENT_IMMEDIATE, nullptr) == EVENT_DONE) {
return ACTION_RESULT_DONE;
}
return &c->_action;
}

{
CACHE_TRY_LOCK(lock, stripe->mutex, mutex->thread_holding);
if (!lock.is_locked() || (od = stripe->open_read(key)) || stripe->directory.probe(key, stripe, &result, &last_collision)) {
c = new_CacheVC(cont);
c->first_key = c->key = c->earliest_key = *key;
c->stripe = stripe;
c->vio.op = VIO::READ;
c->op_type = static_cast<int>(CacheOpType::Read);
ts::Metrics::Gauge::increment(cache_rsb.status[c->op_type].active);
ts::Metrics::Gauge::increment(stripe->cache_vol->vol_rsb.status[c->op_type].active);
c->request.copy_shallow(request);
c->frag_type = CACHE_FRAG_TYPE_HTTP;
c->params = params;
c->od = od;
if (!lock.is_locked() || stripe->directory.probe(key, stripe, &result, &last_collision)) {
c = new_CacheVC_for_read(cont, key, request, params, stripe);
}
if (!lock.is_locked()) {
SET_CONTINUATION_HANDLER(c, &CacheVC::openReadStartHead);
Expand All @@ -566,9 +592,7 @@ Cache::open_read(Continuation *cont, const CacheKey *key, CacheHTTPHdr *request,
if (!c) {
goto Lmiss;
}
if (c->od) {
goto Lwriter;
}

// hit
c->dir = c->first_dir = result;
c->last_collision = last_collision;
Expand All @@ -587,13 +611,6 @@ Cache::open_read(Continuation *cont, const CacheKey *key, CacheHTTPHdr *request,
ts::Metrics::Counter::increment(stripe->cache_vol->vol_rsb.status[static_cast<int>(CacheOpType::Read)].failure);
cont->handleEvent(CACHE_EVENT_OPEN_READ_FAILED, reinterpret_cast<void *>(-ECACHE_NO_DOC));
return ACTION_RESULT_DONE;
Lwriter:
cont->handleEvent(CACHE_EVENT_OPEN_READ_RWW, nullptr);
SET_CONTINUATION_HANDLER(c, &CacheVC::openReadFromWriter);
if (c->handleEvent(EVENT_IMMEDIATE, nullptr) == EVENT_DONE) {
return ACTION_RESULT_DONE;
}
return &c->_action;
Lcallreturn:
if (c->handleEvent(AIO_EVENT_DONE, nullptr) == EVENT_DONE) {
return ACTION_RESULT_DONE;
Expand Down
51 changes: 37 additions & 14 deletions src/iocore/cache/CacheDir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@
#include "PreservationTable.h"
#include "Stripe.h"

#include "iocore/eventsystem/Event.h"
#include "tscore/hugepages.h"
#include "tscore/Random.h"
#include "ts/ats_probe.h"
#include "tsutil/Bravo.h"
#include <mutex>

#ifdef LOOP_CHECK_MODE
#define DIR_LOOP_THRESHOLD 1000
Expand All @@ -46,6 +49,7 @@ DbgCtl dbg_ctl_dir_clean{"dir_clean"};
#ifdef DEBUG

DbgCtl dbg_ctl_cache_stats{"cache_stats"};
DbgCtl dbg_ctl_cache_open_dir{"cache_open_dir"};
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused debug control: The debug control variable dbg_ctl_cache_open_dir is defined but never used in the code. If this was added for future debugging purposes, consider adding a comment explaining its intended use. Otherwise, it should be removed to avoid clutter.

Suggested change
DbgCtl dbg_ctl_cache_open_dir{"cache_open_dir"};

Copilot uses AI. Check for mistakes.
DbgCtl dbg_ctl_dir_probe_hit{"dir_probe_hit"};
DbgCtl dbg_ctl_dir_probe_tag{"dir_probe_tag"};
DbgCtl dbg_ctl_dir_probe_miss{"dir_probe_miss"};
Expand Down Expand Up @@ -78,10 +82,11 @@ OpenDir::OpenDir()
int
OpenDir::open_write(CacheVC *cont, int allow_if_writers, int max_writers)
{
ink_assert(cont->stripe->mutex->thread_holding == this_ethread());
std::lock_guard lock(_shared_mutex);

unsigned int h = cont->first_key.slice32(0);
int b = h % OPEN_DIR_BUCKETS;
for (OpenDirEntry *d = bucket[b].head; d; d = d->link.next) {
for (OpenDirEntry *d = _bucket[b].head; d; d = d->link.next) {
if (!(d->writers.head->first_key == cont->first_key)) {
continue;
}
Expand All @@ -107,17 +112,27 @@ OpenDir::open_write(CacheVC *cont, int allow_if_writers, int max_writers)
dir_clear(&od->first_dir);
cont->od = od;
cont->write_vector = &od->vector;
bucket[b].push(od);
_bucket[b].push(od);
return 1;
}

/**
This event handler is called in two cases:

1. Direct call from OpenDir::close_write - writer lock is already acquired
2. Self retry through event system - need to acquire writer lock
*/
int
OpenDir::signal_readers(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
OpenDir::signal_readers(int event, Event * /* ATS UNUSED*/)
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment formatting uses "ATS UNUSED" with a space, but elsewhere in the codebase it's typically "ATS_UNUSED" with an underscore. Consider maintaining consistency with the existing pattern.

Suggested change
OpenDir::signal_readers(int event, Event * /* ATS UNUSED*/)
OpenDir::signal_readers(int event, Event * /* ATS_UNUSED */)

Copilot uses AI. Check for mistakes.
{
if (event == CACHE_EVENT_OPEN_DIR_RETRY) {
_shared_mutex.lock();
}

Queue<CacheVC, Link_CacheVC_opendir_link> newly_delayed_readers;
EThread *t = mutex->thread_holding;
CacheVC *c = nullptr;
while ((c = delayed_readers.dequeue())) {
while ((c = _delayed_readers.dequeue())) {
CACHE_TRY_LOCK(lock, c->mutex, t);
if (lock.is_locked()) {
c->f.open_read_timeout = 0;
Expand All @@ -127,28 +142,34 @@ OpenDir::signal_readers(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
newly_delayed_readers.push(c);
}
if (newly_delayed_readers.head) {
delayed_readers = newly_delayed_readers;
EThread *t1 = newly_delayed_readers.head->mutex->thread_holding;
_delayed_readers = newly_delayed_readers;
EThread *t1 = newly_delayed_readers.head->mutex->thread_holding;
if (!t1) {
t1 = mutex->thread_holding;
}
t1->schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay));
t1->schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay), CACHE_EVENT_OPEN_DIR_RETRY);
}
return 0;

if (event == CACHE_EVENT_OPEN_DIR_RETRY) {
_shared_mutex.unlock();
}

return EVENT_DONE;
}

int
OpenDir::close_write(CacheVC *cont)
{
ink_assert(cont->stripe->mutex->thread_holding == this_ethread());
std::lock_guard lock(_shared_mutex);

cont->od->writers.remove(cont);
cont->od->num_writers--;
if (!cont->od->writers.head) {
unsigned int h = cont->first_key.slice32(0);
int b = h % OPEN_DIR_BUCKETS;
bucket[b].remove(cont->od);
delayed_readers.append(cont->od->readers);
signal_readers(0, nullptr);
_bucket[b].remove(cont->od);
_delayed_readers.append(cont->od->readers);
signal_readers(EVENT_CALL, nullptr);
cont->od->vector.clear();
THREAD_FREE(cont->od, openDirEntryAllocator, cont->mutex->thread_holding);
}
Expand All @@ -159,9 +180,11 @@ OpenDir::close_write(CacheVC *cont)
OpenDirEntry *
OpenDir::open_read(const CryptoHash *key) const
{
ts::bravo::shared_lock lock(_shared_mutex);

unsigned int h = key->slice32(0);
int b = h % OPEN_DIR_BUCKETS;
for (OpenDirEntry *d = bucket[b].head; d; d = d->link.next) {
for (OpenDirEntry *d = _bucket[b].head; d; d = d->link.next) {
if (d->writers.head->first_key == *key) {
return d;
}
Expand Down
18 changes: 13 additions & 5 deletions src/iocore/cache/P_CacheDir.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "iocore/eventsystem/Continuation.h"
#include "iocore/aio/AIO.h"
#include "tscore/Version.h"
#include "tsutil/Bravo.h"

#include <cstdint>
#include <ctime>
Expand Down Expand Up @@ -225,16 +226,23 @@ struct OpenDirEntry {
}
};

struct OpenDir : public Continuation {
Queue<CacheVC, Link_CacheVC_opendir_link> delayed_readers;
DLL<OpenDirEntry> bucket[OPEN_DIR_BUCKETS];
class OpenDir : public Continuation
{
public:
OpenDir();

int open_write(CacheVC *c, int allow_if_writers, int max_writers);
int close_write(CacheVC *c);
OpenDirEntry *open_read(const CryptoHash *key) const;
int signal_readers(int event, Event *e);

OpenDir();
// event handler
int signal_readers(int event, Event *e);

private:
mutable ts::bravo::shared_mutex _shared_mutex;

Queue<CacheVC, Link_CacheVC_opendir_link> _delayed_readers;
DLL<OpenDirEntry> _bucket[OPEN_DIR_BUCKETS];
};
Comment on lines +229 to 246
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing documentation: The class documentation should be updated to explain the new locking mechanism. Specifically, document that OpenDir now uses its own reader-writer lock (_shared_mutex) instead of relying on the StripeSM mutex, which allows concurrent reads while writes are in progress. This is a significant architectural change that should be clearly documented for maintainability.

Copilot uses AI. Check for mistakes.

struct CacheSync : public Continuation {
Expand Down
15 changes: 8 additions & 7 deletions src/iocore/cache/StripeSM.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,15 @@ class StripeSM : public Continuation, public Stripe

int recover_data();

int open_write(CacheVC *cont, int allow_if_writers, int max_writers);
int open_write_lock(CacheVC *cont, int allow_if_writers, int max_writers);
int close_write(CacheVC *cont);
int begin_read(CacheVC *cont) const;
// unused read-write interlock code
// currently http handles a write-lock failure by retrying the read
// OpenDir API
int open_write(CacheVC *cont, int allow_if_writers, int max_writers);
int open_write_lock(CacheVC *cont, int allow_if_writers, int max_writers);
int close_write(CacheVC *cont);
OpenDirEntry *open_read(const CryptoHash *key) const;
int close_read(CacheVC *cont) const;

// PreservationTable API
int begin_read(CacheVC *cont) const;
int close_read(CacheVC *cont) const;
Comment on lines +104 to +112
Copy link

Copilot AI Jan 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent formatting: The comment "// OpenDir API" and "// PreservationTable API" use double slashes, but there are also methods without such grouping comments. Consider whether this level of API grouping is helpful, and if so, ensure it's applied consistently throughout the class. For example, other method groups could benefit from similar API comments.

Copilot uses AI. Check for mistakes.

int clear_dir_aio();
int clear_dir();
Expand Down