-
Notifications
You must be signed in to change notification settings - Fork 112
Uniformize preemptions/termination checks and implement graceful SIGINT handler #745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
7032874
659b8e7
59af636
98a7e32
1bac257
1d2bc78
606a5f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,6 +1,6 @@ | ||||||||||||||||||||||||||||||||||||||||
| /* clang-format off */ | ||||||||||||||||||||||||||||||||||||||||
| /* | ||||||||||||||||||||||||||||||||||||||||
| * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||||||||||||||||||||||||||||||||||||||||
| * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||||||||||||||||||||||||||||||||||||||||
| * SPDX-License-Identifier: Apache-2.0 | ||||||||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||||||||
| /* clang-format on */ | ||||||||||||||||||||||||||||||||||||||||
|
|
@@ -89,5 +89,18 @@ class default_set_solution_callback_t : public set_solution_callback_t { | |||||||||||||||||||||||||||||||||||||||
| PyObject* pyCallbackClass; | ||||||||||||||||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| class default_check_termination_callback_t : public check_termination_callback_t { | ||||||||||||||||||||||||||||||||||||||||
| public: | ||||||||||||||||||||||||||||||||||||||||
| bool check_termination() override | ||||||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||||||
| PyObject* res = PyObject_CallMethod(this->pyCallbackClass, "check_termination", nullptr); | ||||||||||||||||||||||||||||||||||||||||
| bool should_terminate = PyObject_IsTrue(res); | ||||||||||||||||||||||||||||||||||||||||
| Py_DECREF(res); | ||||||||||||||||||||||||||||||||||||||||
| return should_terminate; | ||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+94
to
+100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing NULL check on Python API return value will cause crash on exception.
🔎 Proposed fix bool check_termination() override
{
PyObject* res = PyObject_CallMethod(this->pyCallbackClass, "check_termination", nullptr);
+ if (res == nullptr) {
+ // Python exception occurred - treat as termination request
+ PyErr_Print();
+ return true;
+ }
bool should_terminate = PyObject_IsTrue(res);
Py_DECREF(res);
return should_terminate;
}📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| PyObject* pyCallbackClass; | ||||||||||||||||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| } // namespace internals | ||||||||||||||||||||||||||||||||||||||||
| } // namespace cuopt | ||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| /* clang-format off */ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| /* clang-format on */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <chrono> | ||
| #include <csignal> | ||
| #include <cstdlib> | ||
| #include <deque> | ||
| #include <functional> | ||
| #include <mutex> | ||
| #include <unordered_map> | ||
|
|
||
| namespace cuopt { | ||
|
|
||
| /** | ||
| * @brief Global singleton that handles SIGINT (Ctrl-C) and invokes registered callbacks. | ||
| * | ||
| * Components that want to respond to user interrupts register a callback via | ||
| * register_callback() and unregister via unregister_callback(). | ||
| * | ||
| * Safety feature: If the user presses Ctrl-C 5 times within 5 seconds, | ||
| * the process is forcefully terminated. | ||
| */ | ||
| class user_interrupt_handler_t { | ||
| public: | ||
| static user_interrupt_handler_t& instance() | ||
| { | ||
| static user_interrupt_handler_t instance; | ||
| return instance; | ||
| } | ||
|
|
||
| bool termination_requested() const { return terminate_signal_received_.load(); } | ||
|
|
||
| /** | ||
| * @brief Register a callback to be invoked on SIGINT. | ||
| * @param callback Function to call when interrupt is received. | ||
| * @return Registration ID for later removal. | ||
| */ | ||
| size_t register_callback(std::function<void()> callback) | ||
| { | ||
| std::lock_guard<std::mutex> lock(mutex_); | ||
| size_t id = next_id_++; | ||
| callbacks_[id] = std::move(callback); | ||
| return id; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Unregister a previously registered callback. | ||
| * @param id Registration ID returned by register_callback(). | ||
| */ | ||
| void unregister_callback(size_t id) | ||
| { | ||
| std::lock_guard<std::mutex> lock(mutex_); | ||
| callbacks_.erase(id); | ||
| } | ||
|
|
||
| // Non-copyable, non-movable | ||
| user_interrupt_handler_t(const user_interrupt_handler_t&) = delete; | ||
| user_interrupt_handler_t& operator=(const user_interrupt_handler_t&) = delete; | ||
| user_interrupt_handler_t(user_interrupt_handler_t&&) = delete; | ||
| user_interrupt_handler_t& operator=(user_interrupt_handler_t&&) = delete; | ||
|
|
||
| private: | ||
| static constexpr int force_quit_threshold = 5; | ||
| static constexpr int force_quit_window_seconds = 5; | ||
|
|
||
| using time_point = std::chrono::steady_clock::time_point; | ||
|
|
||
| user_interrupt_handler_t() { previous_handler_ = std::signal(SIGINT, &handle_signal); } | ||
|
|
||
| ~user_interrupt_handler_t() | ||
| { | ||
| if (previous_handler_ != SIG_ERR) { std::signal(SIGINT, previous_handler_); } | ||
| } | ||
|
|
||
| static void handle_signal(int /*sig*/) | ||
| { | ||
| auto& self = instance(); | ||
| std::lock_guard<std::mutex> lock(self.mutex_); | ||
|
|
||
| self.terminate_signal_received_ = true; | ||
|
|
||
| auto now = std::chrono::steady_clock::now(); | ||
| auto cutoff = now - std::chrono::seconds(force_quit_window_seconds); | ||
|
|
||
| // Remove timestamps older than the window | ||
| while (!self.interrupt_times_.empty() && self.interrupt_times_.front() < cutoff) { | ||
| self.interrupt_times_.pop_front(); | ||
| } | ||
| self.interrupt_times_.push_back(now); | ||
|
|
||
| // Force quit if too many interrupts in the window | ||
| if (static_cast<int>(self.interrupt_times_.size()) >= force_quit_threshold) { | ||
| fprintf(stderr, | ||
| "Force quit: %d interrupts in %d seconds.", | ||
| force_quit_threshold, | ||
| force_quit_window_seconds); | ||
| std::_Exit(128 + SIGINT); | ||
| } | ||
|
|
||
| // Invoke all registered callbacks | ||
| for (const auto& [id, callback] : self.callbacks_) { | ||
| callback(); | ||
| } | ||
|
|
||
| auto remaining = force_quit_threshold - static_cast<int>(self.interrupt_times_.size()); | ||
| fprintf(stderr, | ||
| "Interrupt received. Stopping solver... (press Ctrl-C %d more time(s) to force quit)", | ||
| remaining); | ||
| } | ||
|
Comment on lines
+81
to
+115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Calling mutex and invoking callbacks in a signal handler is undefined behavior. Signal handlers have strict requirements on what functions can be called safely. Per POSIX, only async-signal-safe functions are permitted in signal handlers. This can cause deadlocks if SIGINT arrives while Consider one of these safer approaches:
🔎 Suggested safer pattern static void handle_signal(int /*sig*/)
{
auto& self = instance();
- std::lock_guard<std::mutex> lock(self.mutex_);
-
- self.terminate_signal_received_ = true;
-
- auto now = std::chrono::steady_clock::now();
- auto cutoff = now - std::chrono::seconds(force_quit_window_seconds);
-
- // Remove timestamps older than the window
- while (!self.interrupt_times_.empty() && self.interrupt_times_.front() < cutoff) {
- self.interrupt_times_.pop_front();
- }
- self.interrupt_times_.push_back(now);
-
- // Force quit if too many interrupts in the window
- if (static_cast<int>(self.interrupt_times_.size()) >= force_quit_threshold) {
- fprintf(stderr,
- "Force quit: %d interrupts in %d seconds.",
- force_quit_threshold,
- force_quit_window_seconds);
- std::_Exit(128 + SIGINT);
- }
-
- // Invoke all registered callbacks
- for (const auto& [id, callback] : self.callbacks_) {
- callback();
- }
-
- auto remaining = force_quit_threshold - static_cast<int>(self.interrupt_times_.size());
- fprintf(stderr,
- "Interrupt received. Stopping solver... (press Ctrl-C %d more time(s) to force quit)",
- remaining);
+ // Only use async-signal-safe operations here
+ self.terminate_signal_received_.store(true, std::memory_order_relaxed);
+
+ // Increment interrupt count atomically for force-quit detection
+ int count = self.interrupt_count_.fetch_add(1, std::memory_order_relaxed) + 1;
+
+ if (count >= force_quit_threshold) {
+ // _Exit is async-signal-safe
+ std::_Exit(128 + SIGINT);
+ }
+
+ // Use write() instead of fprintf - it's async-signal-safe
+ const char msg[] = "Interrupt received. Stopping solver...\n";
+ write(STDERR_FILENO, msg, sizeof(msg) - 1);
}
+
+ // Add atomic counter for simpler force-quit detection
+ std::atomic<int> interrupt_count_{0};
🤖 Prompt for AI Agents |
||
|
|
||
| std::mutex mutex_; | ||
| std::unordered_map<size_t, std::function<void()>> callbacks_; | ||
| std::atomic<bool> terminate_signal_received_{false}; | ||
| size_t next_id_{0}; | ||
| std::deque<time_point> interrupt_times_; | ||
| void (*previous_handler_)(int) = SIG_ERR; | ||
| }; | ||
|
|
||
| } // namespace cuopt | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion | 🟠 Major
Add Doxygen documentation for new public API methods.
The new
set_lp_callbackandget_lp_callbacksmethods lack documentation comments. Public APIs in headers undercpp/include/cuopt/should include Doxygen-style documentation covering:As per coding guidelines for public header files.
📋 Suggested documentation template
🤖 Prompt for AI Agents