diff --git a/CHANGES.rst b/CHANGES.rst index 1940901..fff30be 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,43 @@ 3.2.6 (unreleased) ================== -- Nothing changed yet. +- Fix multiple crash paths during interpreter shutdown on Python < 3.11 + (observed with uWSGI worker recycling). Three root causes were + identified and fixed: + + 1. ``clear_deleteme_list()`` used a ``PythonAllocator``-backed vector + copy (``PyMem_Malloc``), which could SIGSEGV during early + ``Py_FinalizeEx`` when Python's allocator pools are partially torn + down. Replaced with ``std::swap`` (zero-allocation, + constant-time) and switched the ``deleteme`` vector to + ``std::allocator`` (system ``malloc``). + + 2. ``ThreadState`` objects were allocated via ``PyObject_Malloc``, + placing them in ``pymalloc`` pools that can be disrupted during + finalization. Switched to ``std::malloc`` / ``std::free`` so + ``ThreadState`` memory remains valid throughout ``Py_FinalizeEx``. + + 3. ``_Py_IsFinalizing()`` is only set *after* ``call_py_exitfuncs`` + and ``_PyGC_CollectIfEnabled`` complete inside ``Py_FinalizeEx``, + so code in atexit handlers or ``__del__`` methods could still call + ``greenlet.getcurrent()`` when type objects had already been + invalidated, crashing in ``PyType_IsSubtype``. An atexit handler + is now registered at module init (LIFO = runs first) that sets a + shutdown flag checked by ``getcurrent()``, + ``PyGreenlet_GetCurrent()``, and ``clear_deleteme_list()``. + + Additionally, ``clear_deleteme_list()`` now preserves any pending + Python exception around its cleanup loop, fixing a latent bug where + an unrelated exception (e.g. one set by ``throw()``) could be + swallowed by ``PyErr_WriteUnraisable`` / ``PyErr_Clear`` inside the + loop. + + This is distinct from the dealloc crash fixed in 3.2.5 + (`PR #495 + `_). + Backported from `PR #499 + `_ by Nicolas + Bouvrette. 3.2.5 (2026-02-20) diff --git a/setup.py b/setup.py index c0e3c34..5569a86 100755 --- a/setup.py +++ b/setup.py @@ -225,7 +225,7 @@ def get_greenlet_version(): 'Documentation': 'https://greenlet.readthedocs.io/', 'Changes': 'https://greenlet.readthedocs.io/en/latest/changes.html', }, - license="MIT AND Python-2.0", + license="MIT AND PSF-2.0", license_files=[ 'LICENSE', 'LICENSE.PSF', diff --git a/src/greenlet/CObjects.cpp b/src/greenlet/CObjects.cpp index c135995..4f7a966 100644 --- a/src/greenlet/CObjects.cpp +++ b/src/greenlet/CObjects.cpp @@ -29,6 +29,11 @@ extern "C" { static PyGreenlet* PyGreenlet_GetCurrent(void) { +#if !GREENLET_PY311 + if (g_greenlet_shutting_down || Py_IsFinalizing()) { + return nullptr; + } +#endif return GET_THREAD_STATE().state().get_current().relinquish_ownership(); } diff --git a/src/greenlet/PyGreenlet.cpp b/src/greenlet/PyGreenlet.cpp index 64589de..b292a28 100644 --- a/src/greenlet/PyGreenlet.cpp +++ b/src/greenlet/PyGreenlet.cpp @@ -204,7 +204,7 @@ _green_dealloc_kill_started_non_main_greenlet(BorrowedGreenlet self) // See: https://github.com/python-greenlet/greenlet/issues/411 // https://github.com/python-greenlet/greenlet/issues/351 #if !GREENLET_PY311 - if (_Py_IsFinalizing()) { + if (Py_IsFinalizing()) { self->murder_in_place(); return 1; } diff --git a/src/greenlet/PyModule.cpp b/src/greenlet/PyModule.cpp index 6adcb5c..df2f49b 100644 --- a/src/greenlet/PyModule.cpp +++ b/src/greenlet/PyModule.cpp @@ -17,6 +17,29 @@ using greenlet::ThreadState; # pragma clang diagnostic ignored "-Wunused-variable" #endif +// On Python < 3.11, _Py_IsFinalizing() is only set AFTER +// call_py_exitfuncs and _PyGC_CollectIfEnabled finish inside +// Py_FinalizeEx. Code running in atexit handlers or __del__ +// methods can still call greenlet.getcurrent(), but by that +// time type objects may have been invalidated, causing +// SIGSEGV in PyType_IsSubtype. This flag is set by an atexit +// handler registered at module init (LIFO = runs first). +#if !GREENLET_PY311 +int g_greenlet_shutting_down = 0; + +static PyObject* +_greenlet_atexit_callback(PyObject* UNUSED(self), PyObject* UNUSED(args)) +{ + g_greenlet_shutting_down = 1; + Py_RETURN_NONE; +} + +static PyMethodDef _greenlet_atexit_method = { + "_greenlet_cleanup", _greenlet_atexit_callback, + METH_NOARGS, NULL +}; +#endif + PyDoc_STRVAR(mod_getcurrent_doc, "getcurrent() -> greenlet\n" "\n" @@ -26,6 +49,11 @@ PyDoc_STRVAR(mod_getcurrent_doc, static PyObject* mod_getcurrent(PyObject* UNUSED(module)) { +#if !GREENLET_PY311 + if (g_greenlet_shutting_down || Py_IsFinalizing()) { + Py_RETURN_NONE; + } +#endif return GET_THREAD_STATE().state().get_current().relinquish_ownership_o(); } diff --git a/src/greenlet/TThreadState.hpp b/src/greenlet/TThreadState.hpp index cd97c84..c7cfd7a 100644 --- a/src/greenlet/TThreadState.hpp +++ b/src/greenlet/TThreadState.hpp @@ -1,6 +1,7 @@ #ifndef GREENLET_THREAD_STATE_HPP #define GREENLET_THREAD_STATE_HPP +#include #include #include @@ -22,6 +23,13 @@ using greenlet::refs::CreatedModule; using greenlet::refs::PyErrPieces; using greenlet::refs::NewReference; +// Defined in PyModule.cpp; set by an atexit handler to signal +// that the interpreter is shutting down. Only needed on +// Python < 3.11 where _Py_IsFinalizing() is set too late. +#if !GREENLET_PY311 +extern int g_greenlet_shutting_down; +#endif + namespace greenlet { /** * Thread-local state of greenlets. @@ -104,7 +112,13 @@ class ThreadState { /* Strong reference to the trace function, if any. */ OwnedObject tracefunc; - typedef std::vector > deleteme_t; + // Use std::allocator (malloc/free) instead of PythonAllocator + // (PyMem_Malloc) for the deleteme list. During Py_FinalizeEx on + // Python < 3.11, the PyObject_Malloc pool that holds ThreadState + // can be disrupted, corrupting any PythonAllocator-backed + // containers. Using std::allocator makes this vector independent + // of Python's allocator lifecycle. + typedef std::vector deleteme_t; /* A vector of raw PyGreenlet pointers representing things that need deleted when this thread is running. The vector owns the references, but you need to manually INCREF/DECREF as you use @@ -120,7 +134,6 @@ class ThreadState { static std::clock_t _clocks_used_doing_gc; static ImmortalString get_referrers_name; - static PythonAllocator allocator; G_NO_COPIES_OF_CLS(ThreadState); @@ -146,15 +159,21 @@ class ThreadState { public: - static void* operator new(size_t UNUSED(count)) + // Allocate ThreadState with malloc/free rather than Python's object + // allocator. ThreadState outlives many Python objects and must + // remain valid throughout Py_FinalizeEx. On Python < 3.11, + // PyObject_Malloc pools can be disrupted during early finalization, + // corrupting any C++ objects stored in them. + static void* operator new(size_t count) { - return ThreadState::allocator.allocate(1); + void* p = std::malloc(count); + if (!p) throw std::bad_alloc(); + return p; } static void operator delete(void* ptr) { - return ThreadState::allocator.deallocate(static_cast(ptr), - 1); + std::free(ptr); } static void init() @@ -283,33 +302,50 @@ class ThreadState { inline void clear_deleteme_list(const bool murder=false) { if (!this->deleteme.empty()) { - // It's possible we could add items to this list while - // running Python code if there's a thread switch, so we - // need to defensively copy it before that can happen. - deleteme_t copy = this->deleteme; - this->deleteme.clear(); // in case things come back on the list + // Move the list contents out with swap — a constant-time + // pointer exchange that never allocates. The previous code + // used a copy (deleteme_t copy = this->deleteme) which + // allocated through PythonAllocator / PyMem_Malloc; that + // could SIGSEGV during early Py_FinalizeEx on Python < 3.11 + // when the allocator is partially torn down. + deleteme_t copy; + std::swap(copy, this->deleteme); + + // During Py_FinalizeEx cleanup, the GC or atexit handlers + // may have already collected objects in this list, leaving + // dangling pointers. Attempting Py_DECREF on freed memory + // causes a SIGSEGV. On Python < 3.11, + // g_greenlet_shutting_down covers the early stages + // (before Py_IsFinalizing() is set). +#if !GREENLET_PY311 + if (g_greenlet_shutting_down || Py_IsFinalizing()) { + return; + } +#else + if (Py_IsFinalizing()) { + return; + } +#endif + + // Preserve any pending exception so that cleanup-triggered + // errors don't accidentally swallow an unrelated exception + // (e.g. one set by throw() before a switch). + PyErrPieces incoming_err; + for(deleteme_t::iterator it = copy.begin(), end = copy.end(); it != end; ++it ) { PyGreenlet* to_del = *it; if (murder) { - // Force each greenlet to appear dead; we can't raise an - // exception into it anymore anyway. to_del->pimpl->murder_in_place(); } - - // The only reference to these greenlets should be in - // this list, decreffing them should let them be - // deleted again, triggering calls to green_dealloc() - // in the correct thread (if we're not murdering). - // This may run arbitrary Python code and switch - // threads or greenlets! Py_DECREF(to_del); if (PyErr_Occurred()) { PyErr_WriteUnraisable(nullptr); PyErr_Clear(); } } + incoming_err.PyErrRestore(); } } @@ -371,7 +407,7 @@ class ThreadState { // Python 3.11+ restructured interpreter finalization so that // these APIs remain safe during shutdown. #if !GREENLET_PY311 - if (_Py_IsFinalizing()) { + if (Py_IsFinalizing()) { this->tracefunc.CLEAR(); if (this->current_greenlet) { this->current_greenlet->murder_in_place(); @@ -505,7 +541,6 @@ class ThreadState { }; ImmortalString ThreadState::get_referrers_name(nullptr); -PythonAllocator ThreadState::allocator; std::clock_t ThreadState::_clocks_used_doing_gc(0); diff --git a/src/greenlet/TThreadStateDestroy.cpp b/src/greenlet/TThreadStateDestroy.cpp index 449b788..ea7f6c4 100644 --- a/src/greenlet/TThreadStateDestroy.cpp +++ b/src/greenlet/TThreadStateDestroy.cpp @@ -177,11 +177,7 @@ struct ThreadState_DestroyNoGIL // segfault if we happen to get context switched, and maybe we should // just always implement our own AddPendingCall, but I'd like to see if // this works first -#if GREENLET_PY313 if (Py_IsFinalizing()) { -#else - if (_Py_IsFinalizing()) { -#endif #ifdef GREENLET_DEBUG // No need to log in the general case. Yes, we'll leak, // but we're shutting down so it should be ok. diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index e8d92a0..4e27973 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -232,6 +232,45 @@ greenlet_internal_mod_init() noexcept OwnedObject clocks_per_sec = OwnedObject::consuming(PyLong_FromSsize_t(CLOCKS_PER_SEC)); m.PyAddObject("CLOCKS_PER_SEC", clocks_per_sec); +#if !GREENLET_PY311 + // Register an atexit handler that sets g_greenlet_shutting_down. + // Python's atexit is LIFO: registered last = called first. By + // registering here (at import time, after most other libraries), + // our handler runs before their cleanup code, which may try to + // call greenlet.getcurrent() on objects whose type has been + // invalidated. _Py_IsFinalizing() alone is insufficient + // because it is only set AFTER call_py_exitfuncs completes. + { + PyObject* atexit_mod = PyImport_ImportModule("atexit"); + if (atexit_mod) { + PyObject* register_fn = PyObject_GetAttrString(atexit_mod, "register"); + if (register_fn) { + extern PyMethodDef _greenlet_atexit_method; + PyObject* callback = PyCFunction_New(&_greenlet_atexit_method, NULL); + if (callback) { + PyObject* args = PyTuple_Pack(1, callback); + if (args) { + PyObject* result = PyObject_Call(register_fn, args, NULL); + Py_XDECREF(result); + Py_DECREF(args); + } + Py_DECREF(callback); + } + Py_DECREF(register_fn); + } + // Non-fatal: if atexit registration fails, we still have + // the _Py_IsFinalizing() fallback. + if (PyErr_Occurred()) { + PyErr_Clear(); + } + Py_DECREF(atexit_mod); + } + else { + PyErr_Clear(); + } + } +#endif + /* also publish module-level data as attributes of the greentype. */ // XXX: This is weird, and enables a strange pattern of // confusing the class greenlet with the module greenlet; with diff --git a/src/greenlet/greenlet_cpython_compat.hpp b/src/greenlet/greenlet_cpython_compat.hpp index a3b3850..b49af5a 100644 --- a/src/greenlet/greenlet_cpython_compat.hpp +++ b/src/greenlet/greenlet_cpython_compat.hpp @@ -147,4 +147,12 @@ static inline void PyThreadState_LeaveTracing(PyThreadState *tstate) # define Py_C_RECURSION_LIMIT C_RECURSION_LIMIT #endif +// Py_IsFinalizing() became a public API in Python 3.13. +// Map it to the private _Py_IsFinalizing() on older versions so all +// call sites can use the standard name. Remove this once greenlet +// drops support for Python < 3.13. +#if !GREENLET_PY313 +# define Py_IsFinalizing() _Py_IsFinalizing() +#endif + #endif /* GREENLET_CPYTHON_COMPAT_H */ diff --git a/src/greenlet/tests/test_interpreter_shutdown.py b/src/greenlet/tests/test_interpreter_shutdown.py index 37afc52..a39122a 100644 --- a/src/greenlet/tests/test_interpreter_shutdown.py +++ b/src/greenlet/tests/test_interpreter_shutdown.py @@ -315,6 +315,221 @@ def worker(): self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") self.assertIn("OK: greenlet with active exception at shutdown", stdout) + # ----------------------------------------------------------------- + # getcurrent() / greenlet construction / gettrace() / settrace() + # during finalization + # + # clear_deleteme_list() now uses std::swap (zero-allocation) instead + # of copying the vector, and preserves any pending exception around + # its cleanup loop. This prevents crashes during early Py_FinalizeEx + # on Python < 3.11 and avoids swallowing unrelated exceptions. + # These tests verify no crash occurs on any Python version. + # ----------------------------------------------------------------- + + def test_getcurrent_during_atexit_no_crash(self): + """ + Calling greenlet.getcurrent() inside an atexit handler must not + crash on any Python version. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def call_getcurrent_at_exit(): + try: + g = greenlet.getcurrent() + print(f"OK: getcurrent returned {g!r}") + except Exception as e: + print(f"OK: getcurrent raised {type(e).__name__}: {e}") + + atexit.register(call_getcurrent_at_exit) + print("OK: atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: atexit registered", stdout) + self.assertIn("OK:", stdout.split('\n')[-2] if stdout.strip() else "") + + def test_gettrace_during_atexit_no_crash(self): + """ + Calling greenlet.gettrace() during atexit must not crash. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def check_at_exit(): + try: + result = greenlet.gettrace() + print(f"OK: gettrace returned {result!r}") + except Exception as e: + print(f"OK: gettrace raised {type(e).__name__}: {e}") + + atexit.register(check_at_exit) + print("OK: registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: registered", stdout) + + def test_settrace_during_atexit_no_crash(self): + """ + Calling greenlet.settrace() during atexit must not crash. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def check_at_exit(): + try: + greenlet.settrace(lambda *args: None) + print("OK: settrace succeeded") + except Exception as e: + print(f"OK: settrace raised {type(e).__name__}: {e}") + + atexit.register(check_at_exit) + print("OK: registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: registered", stdout) + + def test_getcurrent_with_active_greenlets_during_atexit(self): + """ + Calling getcurrent() during atexit when active greenlets exist. + This is the exact scenario triggered by uWSGI worker recycling. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def worker(): + greenlet.getcurrent().parent.switch("ready") + + greenlets = [] + for i in range(5): + g = greenlet.greenlet(worker) + result = g.switch() + greenlets.append(g) + + def check_at_exit(): + try: + g = greenlet.getcurrent() + print(f"OK: getcurrent returned {g!r}") + except Exception as e: + print(f"OK: getcurrent raised {type(e).__name__}: {e}") + + atexit.register(check_at_exit) + print(f"OK: {len(greenlets)} active greenlets, atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: 5 active greenlets, atexit registered", stdout) + + def test_greenlet_construction_during_atexit_no_crash(self): + """ + Constructing a new greenlet during atexit must not crash. + greenlet.__init__ calls borrow_current() which triggers + clear_deleteme_list() — the same path that crashes in + mod_getcurrent on Python < 3.11 during early Py_FinalizeEx. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def create_greenlets_at_exit(): + try: + def noop(): + pass + g = greenlet.greenlet(noop) + print(f"OK: created greenlet {g!r}") + except Exception as e: + print(f"OK: construction raised {type(e).__name__}: {e}") + + atexit.register(create_greenlets_at_exit) + print("OK: atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: atexit registered", stdout) + + def test_greenlet_construction_with_active_greenlets_during_atexit(self): + """ + Constructing new greenlets during atexit when other active + greenlets already exist (maximizes the chance of a non-empty + deleteme list). + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def worker(): + greenlet.getcurrent().parent.switch("ready") + + greenlets = [] + for i in range(10): + g = greenlet.greenlet(worker) + g.switch() + greenlets.append(g) + + def create_at_exit(): + try: + new_greenlets = [] + for i in range(5): + g = greenlet.greenlet(lambda: None) + new_greenlets.append(g) + print(f"OK: created {len(new_greenlets)} greenlets at exit") + except Exception as e: + print(f"OK: raised {type(e).__name__}: {e}") + + atexit.register(create_at_exit) + print(f"OK: {len(greenlets)} active greenlets, atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: 10 active greenlets, atexit registered", stdout) + + def test_greenlet_construction_with_cross_thread_deleteme_during_atexit(self): + """ + Create greenlets in a worker thread, transfer them to the main + thread, then drop them — populating the deleteme list. Then + construct a new greenlet during atexit. On Python < 3.11 + clear_deleteme_list() could previously crash if the + PythonAllocator vector copy failed during early Py_FinalizeEx; + using std::swap eliminates that allocation. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + import threading + + cross_thread_refs = [] + + def thread_worker(): + # Create greenlets in this thread + def gl_body(): + greenlet.getcurrent().parent.switch("ready") + for _ in range(20): + g = greenlet.greenlet(gl_body) + g.switch() + cross_thread_refs.append(g) + + t = threading.Thread(target=thread_worker) + t.start() + t.join() + + # Dropping these references in the main thread + # causes them to be added to the main thread's + # deleteme list (deferred cross-thread dealloc). + cross_thread_refs.clear() + + def create_at_exit(): + try: + g = greenlet.greenlet(lambda: None) + print(f"OK: created greenlet at exit {g!r}") + except Exception as e: + print(f"OK: raised {type(e).__name__}: {e}") + + atexit.register(create_at_exit) + print("OK: cross-thread setup done, atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: cross-thread setup done, atexit registered", stdout) + if __name__ == '__main__': unittest.main() diff --git a/src/greenlet/tests/test_leaks.py b/src/greenlet/tests/test_leaks.py index e09da7d..10887d4 100644 --- a/src/greenlet/tests/test_leaks.py +++ b/src/greenlet/tests/test_leaks.py @@ -15,6 +15,7 @@ import greenlet from . import TestCase from . import PY314 +from . import WIN from . import RUNNING_ON_FREETHREAD_BUILD from .leakcheck import fails_leakcheck from .leakcheck import ignores_leakcheck @@ -439,7 +440,15 @@ def __call__(self): self.wait_for_pending_cleanups() uss_after = self.get_process_uss() - self.assertLessEqual(uss_after, uss_before, "after attempts %d" % (count,)) + # On Windows, USS can fluctuate by tens of KB between + # measurements due to working set trimming, page table + # updates, etc. Allow a small tolerance so OS-level noise + # doesn't cause false failures. Real leaks produce MBs of + # growth (each iteration creates 20k greenlets), so 512 KB + # is well below the detection threshold for genuine issues. + tolerance = 512 * 1024 if WIN else 0 + self.assertLessEqual(uss_after, uss_before + tolerance, + "after attempts %d" % (count,)) @ignores_leakcheck # Because we're just trying to track raw memory, not objects, and running