diff --git a/cuda_bindings/tests/test_interoperability.py b/cuda_bindings/tests/test_interoperability.py
index 3da1877128b..96270e208c3 100644
--- a/cuda_bindings/tests/test_interoperability.py
+++ b/cuda_bindings/tests/test_interoperability.py
@@ -6,6 +6,7 @@
 
 import cuda.bindings.driver as cuda
 import cuda.bindings.runtime as cudart
+from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom
 
 
 def supportsMemoryPool():
@@ -87,12 +88,14 @@ def test_interop_graphNode():
 def test_interop_memPool():
     # DRV to RT
     err_dr, pool = cuda.cuDeviceGetDefaultMemPool(0)
+    xfail_if_mempool_oom(err_dr, "cuDeviceGetDefaultMemPool", 0)
     assert err_dr == cuda.CUresult.CUDA_SUCCESS
     (err_rt,) = cudart.cudaDeviceSetMemPool(0, pool)
     assert err_rt == cudart.cudaError_t.cudaSuccess
 
     # RT to DRV
     err_rt, pool = cudart.cudaDeviceGetDefaultMemPool(0)
+    xfail_if_mempool_oom(err_rt, "cudaDeviceGetDefaultMemPool", 0)
     assert err_rt == cudart.cudaError_t.cudaSuccess
     (err_dr,) = cuda.cuDeviceSetMemPool(0, pool)
     assert err_dr == cuda.CUresult.CUDA_SUCCESS
diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
index b7b8b247a92..fb7689c54bb 100644
--- a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
@@ -13,6 +13,7 @@ from cuda.core._memory._ipc cimport IPCAllocationHandle
 from cuda.core._resource_handles cimport (
     as_cu,
     get_device_mempool,
+    get_last_error,
 )
 from cuda.core._utils.cuda_utils cimport (
     check_or_create_options,
@@ -262,6 +263,14 @@ cdef inline _DMR_init(DeviceMemoryResource self, device_id, options):
 
     if opts is None:
         self._h_pool = get_device_mempool(dev_id)
+        if not self._h_pool:
+            HANDLE_RETURN(get_last_error())
+            raise RuntimeError(
+                f"Failed to initialize DeviceMemoryResource for device {dev_id}: "
+                "cuda-core returned an empty memory pool handle without recording a CUDA error. "
+                "This is an internal cuda-core error; please report it with your CUDA driver, "
+                "CUDA Toolkit, and cuda-python versions."
+            )
         self._mempool_owned = False
         MP_raise_release_threshold(self)
     else:
diff --git a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx
index 8fdc324dc59..60b056d3f28 100644
--- a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx
@@ -11,6 +11,7 @@ from cuda.core._memory._buffer cimport Buffer, Buffer_from_deviceptr_handle, Mem
 from cuda.core._resource_handles cimport (
     DevicePtrHandle,
     deviceptr_alloc_async,
+    get_last_error,
     as_cu,
 )
 
@@ -194,7 +195,13 @@ cdef inline Buffer GMR_allocate(cyGraphMemoryResource self, size_t size, Stream
         check_capturing(s)
         h_ptr = deviceptr_alloc_async(size, stream._h_stream)
     if not h_ptr:
-        raise RuntimeError("Failed to allocate memory asynchronously")
+        HANDLE_RETURN(get_last_error())
+        raise RuntimeError(
+            f"Failed to allocate {size} bytes from GraphMemoryResource: "
+            "cuda-core returned an empty allocation handle without recording a CUDA error. "
+            "This is an internal cuda-core error; please report it with your CUDA driver, "
+            "CUDA Toolkit, and cuda-python versions."
+        )
     return Buffer_from_deviceptr_handle(h_ptr, size, self, None)
 
 
diff --git a/cuda_core/cuda/core/_memory/_ipc.pyx b/cuda_core/cuda/core/_memory/_ipc.pyx
index 59414fc1b2e..833b24b0e2a 100644
--- a/cuda_core/cuda/core/_memory/_ipc.pyx
+++ b/cuda_core/cuda/core/_memory/_ipc.pyx
@@ -211,7 +211,13 @@ cdef _MemPool MP_from_allocation_handle(cls, alloc_handle):
     cdef int ipc_fd = int(alloc_handle)
     self._h_pool = create_mempool_handle_ipc(ipc_fd, IPC_HANDLE_TYPE)
     if not self._h_pool:
-        raise RuntimeError("Failed to import memory pool from IPC handle")
+        HANDLE_RETURN(get_last_error())
+        raise RuntimeError(
+            f"Failed to import {cls.__name__} from an allocation handle: "
+            "cuda-core returned an empty memory pool handle without recording a CUDA error. "
+            "This is an internal cuda-core error; please report it with your CUDA driver, "
+            "CUDA Toolkit, and cuda-python versions."
+        )
     self._ipc_data = IPCDataForMR(alloc_handle, True)
 
     # Register it.
diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyx b/cuda_core/cuda/core/_memory/_memory_pool.pyx
index 4da5e26ea92..02857cbb163 100644
--- a/cuda_core/cuda/core/_memory/_memory_pool.pyx
+++ b/cuda_core/cuda/core/_memory/_memory_pool.pyx
@@ -17,6 +17,7 @@ from cuda.core._resource_handles cimport (
     DevicePtrHandle,
     create_mempool_handle,
     deviceptr_alloc_from_pool,
+    get_last_error,
     as_cu,
     as_py,
 )
@@ -228,6 +229,14 @@ cdef int MP_init_create_pool(
 
     self._mempool_owned = True
     self._h_pool = create_mempool_handle(properties)
+    if not self._h_pool:
+        HANDLE_RETURN(get_last_error())
+        raise RuntimeError(
+            f"Failed to initialize {self.__class__.__name__}: "
+            "cuda-core returned an empty memory pool handle without recording a CUDA error. "
+            "This is an internal cuda-core error; please report it with your CUDA driver, "
+            "CUDA Toolkit, and cuda-python versions."
+        )
 
     if ipc_enabled:
         alloc_handle = _ipc.MP_export_mempool(self)
@@ -307,7 +316,13 @@ cdef inline Buffer _MP_allocate(_MemPool self, size_t size, Stream stream):
         check_not_capturing(s)
         h_ptr = deviceptr_alloc_from_pool(size, self._h_pool, stream._h_stream)
     if not h_ptr:
-        raise RuntimeError("Failed to allocate memory from pool")
+        HANDLE_RETURN(get_last_error())
+        raise RuntimeError(
+            f"Failed to allocate {size} bytes from {self.__class__.__name__}: "
+            "cuda-core returned an empty allocation handle without recording a CUDA error. "
+            "This is an internal cuda-core error; please report it with your CUDA driver, "
+            "CUDA Toolkit, and cuda-python versions."
+        )
     return Buffer_from_deviceptr_handle(h_ptr, size, self, None)
 
 
diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index 9f48686c30c..86c0c0cd7d4 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -5,6 +5,7 @@
 import os
 import pathlib
 import sys
+from contextlib import contextmanager
 from importlib.metadata import PackageNotFoundError, distribution
 
 import pytest
@@ -87,6 +88,8 @@ def create_managed_memory_resource_or_skip(*args, xfail_device=None, **kwargs):
         return ManagedMemoryResource(*args, **kwargs)
     except CUDAError as e:
         xfail_if_mempool_oom(e, _device_id_from_resource_options(xfail_device, args, kwargs))
+        if "CUDA_ERROR_NOT_SUPPORTED" in str(e):
+            pytest.skip("ManagedMemoryResource is not supported on this platform/device")
         raise
     except RuntimeError as e:
         if "requires CUDA 13.0" in str(e):
@@ -102,6 +105,15 @@ def create_pinned_memory_resource_or_xfail(*args, xfail_device=None, **kwargs):
         raise
 
 
+@contextmanager
+def xfail_on_graph_mempool_oom(device=0):
+    try:
+        yield
+    except CUDAError as e:
+        xfail_if_mempool_oom(e, "cuGraphAddMemAllocNode", device)
+        raise
+
+
 def _device_id_from_resource_options(device, args, kwargs):
     if device is not None:
         return device
diff --git a/cuda_core/tests/graph/test_graph_definition.py b/cuda_core/tests/graph/test_graph_definition.py
index f9d10c766eb..da78bea577f 100644
--- a/cuda_core/tests/graph/test_graph_definition.py
+++ b/cuda_core/tests/graph/test_graph_definition.py
@@ -10,6 +10,7 @@
 from helpers.graph_kernels import compile_common_kernels
 from helpers.misc import try_create_condition
 
+from conftest import xfail_on_graph_mempool_oom
 from cuda.core import Device, LaunchConfig
 from cuda.core.graph import (
     AllocNode,
@@ -201,13 +202,15 @@ def _build_disconnected():
 def graph_spec(request, init_cuda):
     if request.param is not _build_empty:
         _skip_if_no_mempool()
-    return request.param()
+    with xfail_on_graph_mempool_oom():
+        return request.param()
 
 
 @pytest.fixture(params=_NONEMPTY_BUILDERS)
 def nonempty_graph_spec(request, init_cuda):
     _skip_if_no_mempool()
-    return request.param()
+    with xfail_on_graph_mempool_oom():
+        return request.param()
 
 
 # =============================================================================
@@ -562,7 +565,8 @@ def node_spec(request, init_cuda):
     if spec.needs_mempool:
         _skip_if_no_mempool()
     g = GraphDefinition()
-    node, expected_attrs = spec.builder(g)
+    with xfail_on_graph_mempool_oom():
+        node, expected_attrs = spec.builder(g)
     return spec, g, node, expected_attrs
 
 
@@ -803,18 +807,20 @@ def test_alloc_zero_size_fails(sample_graphdef):
 def test_free_creates_dependency(sample_graphdef):
     """Free node depends on its predecessor."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    free = alloc.deallocate(alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        free = alloc.deallocate(alloc.dptr)
     assert alloc in free.pred
 
 
 def test_alloc_free_chain(sample_graphdef):
     """Alloc and free can be chained."""
     _skip_if_no_mempool()
-    a1 = sample_graphdef.allocate(ALLOC_SIZE)
-    a2 = a1.allocate(ALLOC_SIZE)
-    f2 = a2.deallocate(a2.dptr)
-    f1 = f2.deallocate(a1.dptr)
+    with xfail_on_graph_mempool_oom():
+        a1 = sample_graphdef.allocate(ALLOC_SIZE)
+        a2 = a1.allocate(ALLOC_SIZE)
+        f2 = a2.deallocate(a2.dptr)
+        f1 = f2.deallocate(a1.dptr)
     assert a1 in a2.pred
     assert a2 in f2.pred
     assert f2 in f1.pred
@@ -842,7 +848,8 @@ def test_alloc_device_option(sample_graphdef, device_spec):
     """Device can be specified as int or Device object."""
     _skip_if_no_mempool()
     device = Device()
-    node = sample_graphdef.allocate(ALLOC_SIZE, device=device_spec(device))
+    with xfail_on_graph_mempool_oom(device):
+        node = sample_graphdef.allocate(ALLOC_SIZE, device=device_spec(device))
     assert node.dptr != 0
 
 
@@ -850,7 +857,8 @@ def test_alloc_peer_access(mempool_device_x2):
     """AllocNode.peer_access reflects requested peers."""
     d0, d1 = mempool_device_x2
     g = GraphDefinition()
-    node = g.allocate(ALLOC_SIZE, device=d0.device_id, peer_access=[d1.device_id])
+    with xfail_on_graph_mempool_oom(d0):
+        node = g.allocate(ALLOC_SIZE, device=d0.device_id, peer_access=[d1.device_id])
     assert d1.device_id in node.peer_access
 
 
@@ -863,8 +871,9 @@ def test_alloc_peer_access(mempool_device_x2):
 def test_join_merges_branches(sample_graphdef, num_branches):
     """join() with multiple branches creates correct dependencies."""
     _skip_if_no_mempool()
-    branches = [sample_graphdef.allocate(ALLOC_SIZE) for _ in range(num_branches)]
-    joined = sample_graphdef.join(*branches)
+    with xfail_on_graph_mempool_oom():
+        branches = [sample_graphdef.allocate(ALLOC_SIZE) for _ in range(num_branches)]
+        joined = sample_graphdef.join(*branches)
     assert isinstance(joined, EmptyNode)
     assert set(joined.pred) == set(branches)
 
@@ -956,8 +965,9 @@ def test_instantiate_empty_graph(sample_graphdef, inst_kwargs):
 def test_instantiate_with_nodes(sample_graphdef, inst_kwargs):
     """Graph with nodes can be instantiated."""
     _skip_if_no_mempool()
-    sample_graphdef.allocate(ALLOC_SIZE)
-    sample_graphdef.allocate(ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        sample_graphdef.allocate(ALLOC_SIZE)
+        sample_graphdef.allocate(ALLOC_SIZE)
     graph = _instantiate(sample_graphdef, inst_kwargs)
     assert graph is not None
 
@@ -997,8 +1007,9 @@ def test_instantiate_and_execute_kernel(sample_graphdef, inst_kwargs):
 def test_instantiate_and_execute_alloc_free(sample_graphdef, inst_kwargs):
     """Graph with alloc/free can be executed."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    alloc.deallocate(alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        alloc.deallocate(alloc.dptr)
 
     stream = Device().create_stream()
     graph = _instantiate_and_upload(sample_graphdef, inst_kwargs, stream)
@@ -1010,9 +1021,10 @@ def test_instantiate_and_execute_alloc_free(sample_graphdef, inst_kwargs):
 def test_instantiate_and_execute_memset(sample_graphdef, inst_kwargs):
     """Graph with alloc/memset/free can be executed."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    ms = alloc.memset(alloc.dptr, 0xAB, ALLOC_SIZE)
-    ms.deallocate(alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        ms = alloc.memset(alloc.dptr, 0xAB, ALLOC_SIZE)
+        ms.deallocate(alloc.dptr)
 
     stream = Device().create_stream()
     graph = _instantiate_and_upload(sample_graphdef, inst_kwargs, stream)
@@ -1026,12 +1038,13 @@ def test_instantiate_and_execute_memcpy(sample_graphdef, inst_kwargs):
     _skip_if_no_mempool()
     import ctypes
 
-    src_alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    dst_alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    dep = sample_graphdef.join(src_alloc, dst_alloc)
-    ms = dep.memset(src_alloc.dptr, 0xAB, ALLOC_SIZE)
-    cp = ms.memcpy(dst_alloc.dptr, src_alloc.dptr, ALLOC_SIZE)
-    cp.deallocate(src_alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        src_alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        dst_alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        dep = sample_graphdef.join(src_alloc, dst_alloc)
+        ms = dep.memset(src_alloc.dptr, 0xAB, ALLOC_SIZE)
+        cp = ms.memcpy(dst_alloc.dptr, src_alloc.dptr, ALLOC_SIZE)
+        cp.deallocate(src_alloc.dptr)
 
     stream = Device().create_stream()
     graph = _instantiate_and_upload(sample_graphdef, inst_kwargs, stream)
@@ -1166,11 +1179,12 @@ def test_instantiate_and_execute_if_then(sample_graphdef):
     set_handle = mod.get_kernel("set_handle")
     add_one = mod.get_kernel("add_one")
 
-    alloc = sample_graphdef.allocate(ctypes.sizeof(ctypes.c_int))
-    ms = alloc.memset(alloc.dptr, 0, ctypes.sizeof(ctypes.c_int))
-    setter = ms.launch(LaunchConfig(grid=1, block=1), set_handle, condition, 1)
-    if_node = setter.if_then(condition)
-    if_node.then.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ctypes.sizeof(ctypes.c_int))
+        ms = alloc.memset(alloc.dptr, 0, ctypes.sizeof(ctypes.c_int))
+        setter = ms.launch(LaunchConfig(grid=1, block=1), set_handle, condition, 1)
+        if_node = setter.if_then(condition)
+        if_node.then.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
 
     graph = sample_graphdef.instantiate()
     stream = Device().create_stream()
@@ -1198,13 +1212,14 @@ def test_instantiate_and_execute_if_else(sample_graphdef):
     set_handle = mod.get_kernel("set_handle")
     add_one = mod.get_kernel("add_one")
 
-    alloc = sample_graphdef.allocate(ctypes.sizeof(ctypes.c_int))
-    ms = alloc.memset(alloc.dptr, 0, ctypes.sizeof(ctypes.c_int))
-    setter = ms.launch(LaunchConfig(grid=1, block=1), set_handle, condition, 0)
-    ie_node = setter.if_else(condition)
-    ie_node.then.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
-    n1 = ie_node.else_.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
-    n1.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ctypes.sizeof(ctypes.c_int))
+        ms = alloc.memset(alloc.dptr, 0, ctypes.sizeof(ctypes.c_int))
+        setter = ms.launch(LaunchConfig(grid=1, block=1), set_handle, condition, 0)
+        ie_node = setter.if_else(condition)
+        ie_node.then.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
+        n1 = ie_node.else_.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
+        n1.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
 
     graph = sample_graphdef.instantiate()
     stream = Device().create_stream()
@@ -1232,12 +1247,13 @@ def test_instantiate_and_execute_switch(sample_graphdef):
     set_handle = mod.get_kernel("set_handle")
     add_one = mod.get_kernel("add_one")
 
-    alloc = sample_graphdef.allocate(ctypes.sizeof(ctypes.c_int))
-    ms = alloc.memset(alloc.dptr, 0, ctypes.sizeof(ctypes.c_int))
-    setter = ms.launch(LaunchConfig(grid=1, block=1), set_handle, condition, 2)
-    sw_node = setter.switch(condition, 4)
-    for branch in sw_node.branches:
-        branch.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ctypes.sizeof(ctypes.c_int))
+        ms = alloc.memset(alloc.dptr, 0, ctypes.sizeof(ctypes.c_int))
+        setter = ms.launch(LaunchConfig(grid=1, block=1), set_handle, condition, 2)
+        sw_node = setter.switch(condition, 4)
+        for branch in sw_node.branches:
+            branch.launch(LaunchConfig(grid=1, block=1), add_one, alloc.dptr)
 
     graph = sample_graphdef.instantiate()
     stream = Device().create_stream()
@@ -1272,7 +1288,8 @@ def test_conditional_node_type_preserved_by_nodes(sample_graphdef):
 def test_debug_dot_print_creates_file(sample_graphdef, dot_file):
     """debug_dot_print writes a DOT file."""
     _skip_if_no_mempool()
-    sample_graphdef.allocate(ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        sample_graphdef.allocate(ALLOC_SIZE)
     sample_graphdef.debug_dot_print(str(dot_file))
     assert dot_file.exists()
     content = dot_file.read_text()
@@ -1282,7 +1299,8 @@ def test_debug_dot_print_creates_file(sample_graphdef, dot_file):
 def test_debug_dot_print_with_options(sample_graphdef, dot_file):
     """debug_dot_print accepts GraphDebugPrintOptions."""
     _skip_if_no_mempool()
-    sample_graphdef.allocate(ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        sample_graphdef.allocate(ALLOC_SIZE)
     options = GraphDebugPrintOptions(verbose=True, handles=True)
     sample_graphdef.debug_dot_print(str(dot_file), options)
     assert dot_file.exists()
@@ -1291,6 +1309,7 @@ def test_debug_dot_print_with_options(sample_graphdef, dot_file):
 def test_debug_dot_print_invalid_options(sample_graphdef, dot_file):
     """debug_dot_print rejects invalid options type."""
     _skip_if_no_mempool()
-    sample_graphdef.allocate(ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        sample_graphdef.allocate(ALLOC_SIZE)
     with pytest.raises(TypeError, match="options must be a GraphDebugPrintOptions"):
         sample_graphdef.debug_dot_print(str(dot_file), "invalid")
diff --git a/cuda_core/tests/graph/test_graph_definition_errors.py b/cuda_core/tests/graph/test_graph_definition_errors.py
index 40f181e5db1..a8a3c9b8f09 100644
--- a/cuda_core/tests/graph/test_graph_definition_errors.py
+++ b/cuda_core/tests/graph/test_graph_definition_errors.py
@@ -9,6 +9,7 @@
 from helpers.graph_kernels import compile_common_kernels
 from helpers.misc import try_create_condition
 
+from conftest import xfail_on_graph_mempool_oom
 from cuda.core import Device, LaunchConfig
 from cuda.core._utils.cuda_utils import CUDAError
 from cuda.core.graph import (
@@ -69,7 +70,8 @@ def test_memset_invalid_value_size(init_cuda):
     """memset with 3-byte value (not 1, 2, or 4) raises ValueError."""
     _skip_if_no_mempool()
     g = GraphDefinition()
-    alloc = g.allocate(1024)
+    with xfail_on_graph_mempool_oom():
+        alloc = g.allocate(1024)
     with pytest.raises(ValueError):
         alloc.memset(alloc.dptr, b"\x01\x02\x03", 100)
 
@@ -113,8 +115,9 @@ def test_join_single_predecessor(init_cuda):
     """node.join() with no extra args creates a single-dep empty node."""
     _skip_if_no_mempool()
     g = GraphDefinition()
-    a = g.allocate(1024)
-    joined = a.join()
+    with xfail_on_graph_mempool_oom():
+        a = g.allocate(1024)
+        joined = a.join()
     assert isinstance(joined, EmptyNode)
     assert set(joined.pred) == {a}
 
@@ -136,7 +139,8 @@ def test_unmatched_alloc_succeeds(init_cuda):
     """Alloc without corresponding free is valid (graph-scoped lifetime)."""
     _skip_if_no_mempool()
     g = GraphDefinition()
-    g.allocate(1024)
+    with xfail_on_graph_mempool_oom():
+        g.allocate(1024)
     graph = g.instantiate()
     stream = Device().create_stream()
     graph.launch(stream)
@@ -174,10 +178,11 @@ def test_while_loop_zero_iterations(init_cuda):
 
     g = GraphDefinition()
     condition = g.create_condition(default_value=0)
-    alloc = g.allocate(SIZEOF_INT)
-    ms = alloc.memset(alloc.dptr, 0, SIZEOF_INT)
-    loop = ms.while_loop(condition)
-    loop.body.launch(cfg, add_one, alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = g.allocate(SIZEOF_INT)
+        ms = alloc.memset(alloc.dptr, 0, SIZEOF_INT)
+        loop = ms.while_loop(condition)
+        loop.body.launch(cfg, add_one, alloc.dptr)
 
     graph = g.instantiate()
     stream = Device().create_stream()
@@ -202,10 +207,11 @@ def test_if_then_false_skips_body(init_cuda):
 
     g = GraphDefinition()
     condition = g.create_condition(default_value=0)
-    alloc = g.allocate(SIZEOF_INT)
-    ms = alloc.memset(alloc.dptr, 0, SIZEOF_INT)
-    if_node = ms.if_then(condition)
-    if_node.then.launch(cfg, add_one, alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = g.allocate(SIZEOF_INT)
+        ms = alloc.memset(alloc.dptr, 0, SIZEOF_INT)
+        if_node = ms.if_then(condition)
+        if_node.then.launch(cfg, add_one, alloc.dptr)
 
     graph = g.instantiate()
     stream = Device().create_stream()
@@ -230,11 +236,12 @@ def test_switch_oob_skips_all_branches(init_cuda):
 
     g = GraphDefinition()
     condition = g.create_condition(default_value=99)
-    alloc = g.allocate(SIZEOF_INT)
-    ms = alloc.memset(alloc.dptr, 0, SIZEOF_INT)
-    sw = ms.switch(condition, 3)
-    for branch in sw.branches:
-        branch.launch(cfg, add_one, alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = g.allocate(SIZEOF_INT)
+        ms = alloc.memset(alloc.dptr, 0, SIZEOF_INT)
+        sw = ms.switch(condition, 3)
+        for branch in sw.branches:
+            branch.launch(cfg, add_one, alloc.dptr)
 
     graph = g.instantiate()
     stream = Device().create_stream()
diff --git a/cuda_core/tests/graph/test_graph_definition_integration.py b/cuda_core/tests/graph/test_graph_definition_integration.py
index b33b23d8860..12b57bb73a5 100644
--- a/cuda_core/tests/graph/test_graph_definition_integration.py
+++ b/cuda_core/tests/graph/test_graph_definition_integration.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pytest
 
+from conftest import xfail_on_graph_mempool_oom
 from cuda.core import Device, EventOptions, LaunchConfig, Program, ProgramOptions
 from cuda.core._utils.cuda_utils import driver, handle_return
 from cuda.core.graph import GraphDefinition
@@ -204,7 +205,8 @@ def test_heat_diffusion(init_cuda):
     host_ptr = handle_return(driver.cuMemAllocHost(_HEAT_N * SIZEOF_FLOAT))
 
     try:
-        _run_heat_graph(dev, k_heat, k_countdown, host_ptr)
+        with xfail_on_graph_mempool_oom(dev):
+            _run_heat_graph(dev, k_heat, k_countdown, host_ptr)
     finally:
         handle_return(driver.cuMemFreeHost(host_ptr))
 
@@ -314,7 +316,8 @@ def test_bisection_root(init_cuda):
     host_ptr = handle_return(driver.cuMemAllocHost(SIZEOF_FLOAT))
 
     try:
-        _run_bisection_graph(dev, k_eval, k_hi, k_lo, k_cd, k_check, k_newton, host_ptr)
+        with xfail_on_graph_mempool_oom(dev):
+            _run_bisection_graph(dev, k_eval, k_hi, k_lo, k_cd, k_check, k_newton, host_ptr)
     finally:
         handle_return(driver.cuMemFreeHost(host_ptr))
 
@@ -416,7 +419,8 @@ def test_switch_dispatch(init_cuda, mode, expected):
     host_ptr = handle_return(driver.cuMemAllocHost(SIZEOF_INT))
 
     try:
-        _run_switch_graph(dev, mode, k_negate, k_double, k_square, host_ptr)
+        with xfail_on_graph_mempool_oom(dev):
+            _run_switch_graph(dev, mode, k_negate, k_double, k_square, host_ptr)
 
         result = ctypes.c_int.from_address(host_ptr).value
         assert result == expected
diff --git a/cuda_core/tests/graph/test_graph_definition_lifetime.py b/cuda_core/tests/graph/test_graph_definition_lifetime.py
index c53009a5724..40bc6f3c442 100644
--- a/cuda_core/tests/graph/test_graph_definition_lifetime.py
+++ b/cuda_core/tests/graph/test_graph_definition_lifetime.py
@@ -12,6 +12,8 @@
 from helpers.graph_kernels import compile_common_kernels
 from helpers.misc import try_create_condition
 
+from conftest import xfail_on_graph_mempool_oom
+
 
 def _wait_until(predicate, timeout=2.0, interval=0.01):
     """Poll predicate() until True or timeout, driving gc each iteration.
@@ -193,7 +195,8 @@ def test_event_record_node_keeps_event_alive(init_cuda):
     _skip_if_no_mempool()
     dev = Device()
     g = GraphDefinition()
-    alloc = g.allocate(1024)
+    with xfail_on_graph_mempool_oom(dev):
+        alloc = g.allocate(1024)
 
     event = dev.create_event(EventOptions(timing_enabled=False))
     node = alloc.record(event)
@@ -210,7 +213,8 @@ def test_event_wait_node_keeps_event_alive(init_cuda):
     _skip_if_no_mempool()
     dev = Device()
     g = GraphDefinition()
-    alloc = g.allocate(1024)
+    with xfail_on_graph_mempool_oom(dev):
+        alloc = g.allocate(1024)
 
     event = dev.create_event(EventOptions(timing_enabled=False))
     node = alloc.wait(event)
diff --git a/cuda_core/tests/graph/test_graph_memory_resource.py b/cuda_core/tests/graph/test_graph_memory_resource.py
index cdf694e3230..a231d5d694c 100644
--- a/cuda_core/tests/graph/test_graph_memory_resource.py
+++ b/cuda_core/tests/graph/test_graph_memory_resource.py
@@ -8,6 +8,7 @@
 from helpers import IS_WINDOWS, IS_WSL
 from helpers.buffers import compare_buffer_to_constant, make_scratch_buffer, set_buffer
 
+from conftest import xfail_on_graph_mempool_oom
 from cuda.core import (
     Device,
     DeviceMemoryResource,
@@ -64,8 +65,9 @@ def reset(self):
     def alloc(self, num, nbytes):
         """Allocate num buffers of size nbytes from graph memory."""
         gb = self.device.create_graph_builder().begin_building(self.mode)
-        buffers = [self.gmr.allocate(nbytes, stream=gb) for _ in range(num)]
-        graph = gb.end_building().complete()
+        with xfail_on_graph_mempool_oom(self.device):
+            buffers = [self.gmr.allocate(nbytes, stream=gb) for _ in range(num)]
+            graph = gb.end_building().complete()
         graph.upload(self.stream)
         graph.launch(self.stream)
         self.stream.sync()
@@ -129,8 +131,9 @@ def apply_kernels(mr, stream, out):
     else:
         # Capture work, then upload and launch.
         gb = device.create_graph_builder().begin_building(mode)
-        apply_kernels(mr=gmr, stream=gb, out=out)
-        graph = gb.end_building().complete()
+        with xfail_on_graph_mempool_oom(device):
+            apply_kernels(mr=gmr, stream=gb, out=out)
+            graph = gb.end_building().complete()
 
         # First launch.
         graph.upload(stream)
@@ -166,16 +169,17 @@ def test_graph_alloc_with_output(mempool_device, mode):
     # buffer allocated within the graph.  The auto_free_on_launch option
     # is required to properly use the output buffer.
     gb = device.create_graph_builder().begin_building(mode)
-    out = gmr.allocate(NBYTES, stream=gb)
-    out.copy_from(in_, stream=gb)
-    launch(gb, LaunchConfig(grid=1, block=1), add_one, out, NBYTES)
-    options = GraphCompleteOptions(auto_free_on_launch=True)
-    try:
-        graph = gb.end_building().complete(options)
-    except CUDAError as exc:
-        if "CUDA_ERROR_INVALID_VALUE" in str(exc):
-            pytest.skip("auto_free_on_launch not supported on this platform")
-        raise
+    with xfail_on_graph_mempool_oom(device):
+        out = gmr.allocate(NBYTES, stream=gb)
+        out.copy_from(in_, stream=gb)
+        launch(gb, LaunchConfig(grid=1, block=1), add_one, out, NBYTES)
+        options = GraphCompleteOptions(auto_free_on_launch=True)
+        try:
+            graph = gb.end_building().complete(options)
+        except CUDAError as exc:
+            if "CUDA_ERROR_INVALID_VALUE" in str(exc):
+                pytest.skip("auto_free_on_launch not supported on this platform")
+            raise
 
     # Launch the graph. The output buffer is allocated and set to one.
     graph.upload(stream)
@@ -197,8 +201,9 @@ def test_graph_mem_alloc_zero(mempool_device, mode):
     gb = device.create_graph_builder().begin_building(mode)
     stream = device.create_stream()
     gmr = GraphMemoryResource(device)
-    buffer = gmr.allocate(0, stream=gb)
-    graph = gb.end_building().complete()
+    with xfail_on_graph_mempool_oom(device):
+        buffer = gmr.allocate(0, stream=gb)
+        graph = gb.end_building().complete()
     graph.upload(stream)
     graph.launch(stream)
     stream.sync()
@@ -280,8 +285,9 @@ def test_gmr_check_capture_state(mempool_device, mode):
 
     # Capturing
     gb = device.create_graph_builder().begin_building(mode=mode)
-    gmr.allocate(1, stream=gb)  # no error
-    gb.end_building().complete()
+    with xfail_on_graph_mempool_oom(device):
+        gmr.allocate(1, stream=gb)  # no error
+        gb.end_building().complete()
 
 
 @pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
diff --git a/cuda_core/tests/test_managed_memory_warning.py b/cuda_core/tests/test_managed_memory_warning.py
index 5e6032ebe9e..01dd840e2ef 100644
--- a/cuda_core/tests/test_managed_memory_warning.py
+++ b/cuda_core/tests/test_managed_memory_warning.py
@@ -13,7 +13,7 @@
 import pytest
 
 import cuda.bindings
-from conftest import xfail_if_mempool_oom
+from conftest import create_managed_memory_resource_or_skip, xfail_if_mempool_oom
 from cuda.core import Device, ManagedMemoryResource, ManagedMemoryResourceOptions
 from cuda.core._memory._managed_memory_resource import reset_concurrent_access_warning
 from cuda.core._utils.cuda_utils import CUDAError
@@ -28,7 +28,10 @@
 
 def _make_managed_mr(device_id):
     """Create a ManagedMemoryResource with an explicit device preference."""
-    return ManagedMemoryResource(options=ManagedMemoryResourceOptions(preferred_location=device_id))
+    return create_managed_memory_resource_or_skip(
+        options=ManagedMemoryResourceOptions(preferred_location=device_id),
+        xfail_device=device_id,
+    )
 
 
 @pytest.fixture
diff --git a/cuda_core/tests/test_object_protocols.py b/cuda_core/tests/test_object_protocols.py
index 72f7891a711..d1085a952bb 100644
--- a/cuda_core/tests/test_object_protocols.py
+++ b/cuda_core/tests/test_object_protocols.py
@@ -16,6 +16,7 @@
 from helpers.graph_kernels import compile_common_kernels
 from helpers.misc import try_create_condition
 
+from conftest import xfail_on_graph_mempool_oom
 from cuda.core import (
     Buffer,
     Device,
@@ -278,32 +279,36 @@ def sample_root_node_alt(sample_graphdef_alt):
 def sample_empty_node(sample_graphdef):
     """An EmptyNode created by merging two branches."""
     _skip_if_no_mempool()
-    a = sample_graphdef.allocate(ALLOC_SIZE)
-    b = sample_graphdef.allocate(ALLOC_SIZE)
-    return sample_graphdef.join(a, b)
+    with xfail_on_graph_mempool_oom():
+        a = sample_graphdef.allocate(ALLOC_SIZE)
+        b = sample_graphdef.allocate(ALLOC_SIZE)
+        return sample_graphdef.join(a, b)
 
 
 @pytest.fixture
 def sample_empty_node_alt(sample_graphdef):
     """An alternate EmptyNode from same graph."""
     _skip_if_no_mempool()
-    c = sample_graphdef.allocate(ALLOC_SIZE)
-    d = sample_graphdef.allocate(ALLOC_SIZE)
-    return sample_graphdef.join(c, d)
+    with xfail_on_graph_mempool_oom():
+        c = sample_graphdef.allocate(ALLOC_SIZE)
+        d = sample_graphdef.allocate(ALLOC_SIZE)
+        return sample_graphdef.join(c, d)
 
 
 @pytest.fixture
 def sample_alloc_node(sample_graphdef):
     """An AllocNode."""
     _skip_if_no_mempool()
-    return sample_graphdef.allocate(ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        return sample_graphdef.allocate(ALLOC_SIZE)
 
 
 @pytest.fixture
 def sample_alloc_node_alt(sample_graphdef):
     """An alternate AllocNode from same graph."""
     _skip_if_no_mempool()
-    return sample_graphdef.allocate(ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        return sample_graphdef.allocate(ALLOC_SIZE)
 
 
 @pytest.fixture
@@ -328,52 +333,58 @@ def sample_kernel_node_alt(sample_graphdef, init_cuda):
 def sample_free_node(sample_graphdef):
     """A FreeNode."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    return alloc.deallocate(alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        return alloc.deallocate(alloc.dptr)
 
 
 @pytest.fixture
 def sample_free_node_alt(sample_graphdef):
     """An alternate FreeNode from same graph."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    return alloc.deallocate(alloc.dptr)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        return alloc.deallocate(alloc.dptr)
 
 
 @pytest.fixture
 def sample_memset_node(sample_graphdef):
     """A MemsetNode."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    return alloc.memset(alloc.dptr, 0, ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        return alloc.memset(alloc.dptr, 0, ALLOC_SIZE)
 
 
 @pytest.fixture
 def sample_memset_node_alt(sample_graphdef):
     """An alternate MemsetNode from same graph."""
     _skip_if_no_mempool()
-    alloc = sample_graphdef.allocate(ALLOC_SIZE)
-    return alloc.memset(alloc.dptr, 0, ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        alloc = sample_graphdef.allocate(ALLOC_SIZE)
+        return alloc.memset(alloc.dptr, 0, ALLOC_SIZE)
 
 
 @pytest.fixture
 def sample_memcpy_node(sample_graphdef):
     """A MemcpyNode."""
     _skip_if_no_mempool()
-    src = sample_graphdef.allocate(ALLOC_SIZE)
-    dst = sample_graphdef.allocate(ALLOC_SIZE)
-    dep = sample_graphdef.join(src, dst)
-    return dep.memcpy(dst.dptr, src.dptr, ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        src = sample_graphdef.allocate(ALLOC_SIZE)
+        dst = sample_graphdef.allocate(ALLOC_SIZE)
+        dep = sample_graphdef.join(src, dst)
+        return dep.memcpy(dst.dptr, src.dptr, ALLOC_SIZE)
 
 
 @pytest.fixture
 def sample_memcpy_node_alt(sample_graphdef):
     """An alternate MemcpyNode from same graph."""
     _skip_if_no_mempool()
-    src = sample_graphdef.allocate(ALLOC_SIZE)
-    dst = sample_graphdef.allocate(ALLOC_SIZE)
-    dep = sample_graphdef.join(src, dst)
-    return dep.memcpy(dst.dptr, src.dptr, ALLOC_SIZE)
+    with xfail_on_graph_mempool_oom():
+        src = sample_graphdef.allocate(ALLOC_SIZE)
+        dst = sample_graphdef.allocate(ALLOC_SIZE)
+        dep = sample_graphdef.join(src, dst)
+        return dep.memcpy(dst.dptr, src.dptr, ALLOC_SIZE)
 
 
 @pytest.fixture