diff --git a/README.md b/README.md
index 6da895bbb9b..0a986bc10b0 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@ CUDA Python is the home for accessing NVIDIA’s CUDA platform from Python. It c
 * [cuda.core](https://nvidia.github.io/cuda-python/cuda-core/latest): Pythonic access to CUDA Runtime and other core functionality
 * [cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest): Low-level Python bindings to CUDA C APIs
 * [cuda.pathfinder](https://nvidia.github.io/cuda-python/cuda-pathfinder/latest): Utilities for locating CUDA components installed in the user's Python environment
-* [cuda.coop](https://nvidia.github.io/cccl/python/coop): A Python module providing CCCL's reusable block-wide and warp-wide *device* primitives for use within Numba CUDA kernels
-* [cuda.compute](https://nvidia.github.io/cccl/python/compute): A Python module for easy access to CCCL's highly efficient and customizable parallel algorithms, like `sort`, `scan`, `reduce`, `transform`, etc. that are callable on the *host*
+* [cuda.coop](https://nvidia.github.io/cccl/unstable/python/coop.html): A Python module providing CCCL's reusable block-wide and warp-wide *device* primitives for use within Numba CUDA kernels
+* [cuda.compute](https://nvidia.github.io/cccl/unstable/python/compute/index.html): A Python module for easy access to CCCL's highly efficient and customizable parallel algorithms, like `sort`, `scan`, `reduce`, `transform`, etc. that are callable on the *host*
 * [numba.cuda](https://nvidia.github.io/numba-cuda/): A Python DSL that exposes CUDA **SIMT** programming model and compiles a restricted subset of Python code into CUDA kernels and device functions
 * [cuda.tile](https://docs.nvidia.com/cuda/cutile-python/): A new Python DSL that exposes CUDA **Tile** programming model and allows users to write NumPy-like code in CUDA kernels
 * [nvmath-python](https://docs.nvidia.com/cuda/nvmath-python/latest): Pythonic access to NVIDIA CPU & GPU Math Libraries, with [*host*](https://docs.nvidia.com/cuda/nvmath-python/latest/overview.html#host-apis), [*device*](https://docs.nvidia.com/cuda/nvmath-python/latest/overview.html#device-apis), and [*distributed*](https://docs.nvidia.com/cuda/nvmath-python/latest/distributed-apis/index.html) APIs. It also provides low-level Python bindings to host C APIs ([nvmath.bindings](https://docs.nvidia.com/cuda/nvmath-python/latest/bindings/index.html)).
@@ -44,4 +44,6 @@ The list of available interfaces is:
 * NVRTC
 * nvJitLink
 * NVVM
+* nvFatbin
 * cuFile
+* NVML
diff --git a/cuda_core/cuda/core/experimental/__init__.pxd b/cuda_core/cuda/core/experimental/__init__.pxd
deleted file mode 100644
index d8b3a2dc32c..00000000000
--- a/cuda_core/cuda/core/experimental/__init__.pxd
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
deleted file mode 100644
index 08c3e33ce18..00000000000
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Backward compatibility stubs for cuda.core.experimental namespace.
-
-This module provides forwarding stubs that import from the new cuda.core.*
-locations and emit deprecation warnings. Users should migrate to importing
-directly from cuda.core instead of cuda.core.experimental.
-
-The experimental namespace will be removed in v1.0.0.
-
-"""
-
-
-def _warn_deprecated():
-    """Emit a deprecation warning for using the experimental namespace.
-
-    Note: This warning is only when the experimental module is first imported.
-    Subsequent accesses to attributes (like utils, Device, etc.) do not trigger
-    additional warnings since they are already set in the module namespace.
-    """
-    import warnings
-
-    warnings.warn(
-        "The cuda.core.experimental namespace is deprecated. "
-        "Please import directly from cuda.core instead. "
-        "For example, use 'from cuda.core import Device' instead of "
-        "'from cuda.core.experimental import Device'. "
-        "The experimental namespace will be removed in v1.0.0.",
-        DeprecationWarning,
-        stacklevel=3,
-    )
-
-
-# Import from new locations and re-export
-_warn_deprecated()
-
-
-from cuda.core import system, utils
-
-# Make utils accessible as a submodule for backward compatibility
-__import__("sys").modules[__spec__.name + ".utils"] = utils
-
-
-from cuda.core._device import Device
-from cuda.core._event import Event, EventOptions
-from cuda.core._launch_config import LaunchConfig
-from cuda.core._launcher import launch
-from cuda.core._layout import _StridedLayout
-from cuda.core._linker import Linker, LinkerOptions
-from cuda.core._memory import (
-    Buffer,
-    DeviceMemoryResource,
-    DeviceMemoryResourceOptions,
-    GraphMemoryResource,
-    LegacyPinnedMemoryResource,
-    ManagedMemoryResource,
-    ManagedMemoryResourceOptions,
-    MemoryResource,
-    PinnedMemoryResource,
-    PinnedMemoryResourceOptions,
-    VirtualMemoryResource,
-    VirtualMemoryResourceOptions,
-)
-from cuda.core._module import Kernel, ObjectCode
-from cuda.core._program import Program, ProgramOptions
-from cuda.core._stream import Stream, StreamOptions
diff --git a/cuda_core/cuda/core/utils/_program_cache/_keys.py b/cuda_core/cuda/core/utils/_program_cache/_keys.py
index dda07039e32..fbb5ef3f890 100644
--- a/cuda_core/cuda/core/utils/_program_cache/_keys.py
+++ b/cuda_core/cuda/core/utils/_program_cache/_keys.py
@@ -670,7 +670,7 @@ def make_program_cache_key(
     Returns
     -------
     bytes
-        A 32-byte blake2b digest suitable for use as a cache key.
+        An opaque bytes digest suitable for use as a cache key.
 
     Raises
     ------
diff --git a/cuda_core/docs/nv-versions.json b/cuda_core/docs/nv-versions.json
index d55ec26f53f..0d0aa6276d9 100644
--- a/cuda_core/docs/nv-versions.json
+++ b/cuda_core/docs/nv-versions.json
@@ -3,6 +3,10 @@
         "version": "latest",
         "url": "https://nvidia.github.io/cuda-python/cuda-core/latest/"
     },
+    {
+        "version": "1.0.0",
+        "url": "https://nvidia.github.io/cuda-python/cuda-core/1.0.0/"
+    },
     {
         "version": "0.7.0",
         "url": "https://nvidia.github.io/cuda-python/cuda-core/0.7.0/"
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index 6c0019279cf..0a88a5bd4b6 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -6,11 +6,10 @@
 ``cuda.core`` API Reference
 ===========================
 
-This is the main API reference for ``cuda.core``. The package has not yet
-reached version 1.0.0, and APIs may change between minor versions, possibly
-without deprecation warnings. Once version 1.0.0 is released, APIs will
-be considered stable and will follow semantic versioning with appropriate
-deprecation periods for breaking changes.
+This is the main API reference for ``cuda.core``. As of version 1.0.0, all
+APIs are considered stable and follow `Semantic Versioning <https://semver.org/>`_
+with appropriate deprecation periods for breaking changes. See the
+:doc:`support policy <support>` for details.
 
 
 Devices and execution
@@ -261,46 +260,6 @@ execution.
    checkpoint.Process
 
 
-CUDA system information and NVIDIA Management Library (NVML)
-------------------------------------------------------------
-
-.. note::
-   ``cuda.core.system`` support requires ``cuda_bindings`` 12.9.6 or later, or 13.2.0 or later.
-
-Basic functions
-```````````````
-
-.. autosummary::
-   :toctree: generated/
-
-   system.get_user_mode_driver_version
-   system.get_kernel_mode_driver_version
-   system.get_driver_branch
-   system.get_num_devices
-   system.get_nvml_version
-   system.get_process_name
-   system.get_topology_common_ancestor
-   system.get_p2p_status
-
-Events
-``````
-
-.. autosummary::
-   :toctree: generated/
-
-   system.register_events
-
-Types
-`````
-
-.. autosummary::
-   :toctree: generated/
-
-   :template: autosummary/cyclass.rst
-
-   system.Device
-   system.NvlinkInfo
-
 Utility functions
 -----------------
 
diff --git a/cuda_core/docs/source/api_nvml.rst b/cuda_core/docs/source/api_nvml.rst
new file mode 100644
index 00000000000..078f8ac6d67
--- /dev/null
+++ b/cuda_core/docs/source/api_nvml.rst
@@ -0,0 +1,44 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+.. module:: cuda.core.system
+
+CUDA system information and NVIDIA Management Library (NVML)
+============================================================
+
+.. note::
+   ``cuda.core.system`` support requires ``cuda_bindings`` 12.9.6 or later, or 13.2.0 or later.
+
+Basic functions
+---------------
+
+.. autosummary::
+   :toctree: generated/
+
+   get_user_mode_driver_version
+   get_kernel_mode_driver_version
+   get_driver_branch
+   get_num_devices
+   get_nvml_version
+   get_process_name
+   get_topology_common_ancestor
+   get_p2p_status
+
+Events
+------
+
+.. autosummary::
+   :toctree: generated/
+
+   register_events
+
+Types
+-----
+
+.. autosummary::
+   :toctree: generated/
+
+   :template: autosummary/cyclass.rst
+
+   Device
+   NvlinkInfo
diff --git a/cuda_core/docs/source/index.rst b/cuda_core/docs/source/index.rst
index 3bf962d7251..9a266e20949 100644
--- a/cuda_core/docs/source/index.rst
+++ b/cuda_core/docs/source/index.rst
@@ -15,12 +15,14 @@ Welcome to the documentation for ``cuda.core``.
    install
    interoperability
    api
+   api_nvml
    environment_variables
    contribute
 
 .. toctree::
    :maxdepth: 1
 
+   support
    conduct
    license
 
diff --git a/cuda_core/docs/source/install.rst b/cuda_core/docs/source/install.rst
index 90e2a1b5b17..33a46a8c84e 100644
--- a/cuda_core/docs/source/install.rst
+++ b/cuda_core/docs/source/install.rst
@@ -32,7 +32,7 @@ dependencies are as follows:
 Free-threading Build Support
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-As of cuda-core 0.4.0, **experimental** packages for the `free-threaded interpreter`_ are shipped.
+Starting ``cuda-core`` 0.4.0, **experimental** packages for the `free-threaded interpreter`_ are shipped.
 
 1. Support for these builds is best effort, due to heavy use of `built-in
    modules that are known to be thread-unsafe`_, such as ``ctypes``.
diff --git a/cuda_core/docs/source/release/1.0.0-notes.rst b/cuda_core/docs/source/release/1.0.0-notes.rst
index 1a9a67c8614..714dc48ff62 100644
--- a/cuda_core/docs/source/release/1.0.0-notes.rst
+++ b/cuda_core/docs/source/release/1.0.0-notes.rst
@@ -10,21 +10,96 @@
 Highlights
 ----------
 
-- TBD
+- First stable release of ``cuda.core``! As of version 1.0.0, all
+  APIs are considered stable and follow Semantic Versioning (SemVer)
+  with appropriate deprecation periods for breaking changes. See the
+  :doc:`support policy <support>` for details.
+- Added green context support (CUDA 12.4+). New types :class:`Context`,
+  :class:`ContextOptions`, :class:`SMResource`, :class:`SMResourceOptions`,
+  :class:`WorkqueueResource`, and :class:`WorkqueueResourceOptions` enable GPU
+  SM and workqueue resource partitioning. Create green contexts via
+  :meth:`Device.create_context`, then use :meth:`Context.create_stream` and
+  :attr:`Context.resources` to work within the partitioned resources.
+  (`#1976 <https://github.com/NVIDIA/cuda-python/pull/1976>`__)
+- Added the :mod:`cuda.core.checkpoint` module for CUDA process checkpointing,
+  including string process state queries, lock/checkpoint/restore/unlock
+  operations, and GPU UUID remapping support for restore.
+  (`#1343 <https://github.com/NVIDIA/cuda-python/issues/1343>`__)
 
 
 New features
 ------------
 
-- Added the :mod:`cuda.core.checkpoint` module for CUDA process checkpointing,
-  including string process state queries, lock/checkpoint/restore/unlock
-  operations, and GPU UUID remapping support for restore.
-  (`#1343 <https://github.com/NVIDIA/cuda-python/issues/1343>`__)
+- :meth:`Program.compile` now accepts an optional ``cache=`` keyword argument
+  for avoiding recompilation of identical source + options + target. Two
+  concrete implementations of the :class:`~utils.ProgramCacheResource` ABC are
+  provided: :class:`~utils.InMemoryProgramCache` (thread-safe, single-process
+  LRU) and :class:`~utils.FileStreamProgramCache` (disk-backed, cross-process
+  safe, LRU-evicting). A standalone :func:`~utils.make_program_cache_key`
+  function is exposed for callers who need to incorporate additional content
+  (e.g. headers or PCH files) into the cache key.
+  (`#1912 <https://github.com/NVIDIA/cuda-python/pull/1912>`__)
+- Changes to the :mod:`cuda.core.system` module for NVIDIA Management Library (NVML)
+  access:
+
+  - :attr:`system.Device.mig` for querying and setting MIG mode, enumerating
+    MIG device instances, and navigating parent/child relationships.
+    (`#1916 <https://github.com/NVIDIA/cuda-python/pull/1916>`__)
+  - :attr:`system.Device.compute_running_processes` for querying running compute
+    processes on a device, returning :class:`~system.ProcessInfo` objects with
+    PID, GPU memory usage, and MIG instance IDs.
+    (`#1917 <https://github.com/NVIDIA/cuda-python/pull/1917>`__)
+  - :meth:`system.Device.get_nvlink` for querying NVLink version and state per
+    link, and :attr:`system.Device.utilization` returning current GPU and memory
+    utilization rates.
+    (`#1918 <https://github.com/NVIDIA/cuda-python/pull/1918>`__)
+
+- Enums are now available in places where a small number of string values are
+  accepted or returned.  You may continue to use the string values, or use
+  enumerations for better linting and type-checking.
+  (`#2016 <https://github.com/NVIDIA/cuda-python/issues/2016>`__)
+  The new enums are:
+
+  - :class:`cuda.core.typing.CompilerBackendType`
+  - :class:`cuda.core.typing.GraphConditionalType`
+  - :class:`cuda.core.typing.GraphMemoryType`
+  - :class:`cuda.core.typing.ManagedMemoryLocationType`
+  - :class:`cuda.core.typing.ObjectCodeFormatType`
+  - :class:`cuda.core.typing.PCHStatusType`
+  - :class:`cuda.core.typing.SourceCodeType`
+  - :class:`cuda.core.typing.VirtualMemoryAccessType`
+  - :class:`cuda.core.typing.VirtualMemoryAllocationType`
+  - :class:`cuda.core.typing.VirtualMemoryGranularityType`
+  - :class:`cuda.core.typing.VirtualMemoryHandleType`
+  - :class:`cuda.core.typing.VirtualMemoryLocationType`
 
 
 Breaking changes
 ----------------
 
+- :class:`~utils.StridedMemoryView` now provides a fast path for ``torch.Tensor``
+  objects via PyTorch's AOT Inductor (AOTI) stable C ABI. When a ``torch.Tensor``
+  is passed to any ``from_*`` classmethod (``from_dlpack``,
+  ``from_cuda_array_interface``, ``from_array_interface``, or
+  ``from_any_interface``), tensor metadata is read directly from the underlying
+  C struct, bypassing the DLPack and CUDA Array Interface protocol overhead.
+  This yields ~7–20x faster ``StridedMemoryView`` construction for PyTorch
+  tensors (depending on whether stream ordering is required). Proper CUDA stream
+  ordering is established between PyTorch's current stream and the consumer
+  stream, matching the DLPack synchronization contract.
+  Requires PyTorch >= 2.3.
+
+  This is a *behavioral* breaking change: because the AOTI tensor bridge reads
+  raw metadata without re-enacting PyTorch's export guardrails, tensors that
+  PyTorch would reject at the DLPack boundary (notably ``requires_grad``,
+  conjugated, non-strided/sparse, and wrong-current-device CUDA tensors) are
+  now accepted. This is intentional — ``StridedMemoryView`` is designed for
+  low-level interop where those checks are not needed.
+  (`#749 <https://github.com/NVIDIA/cuda-python/issues/749>`__)
+- Removed the deprecated ``cuda.core.experimental`` namespace. All public APIs
+  have been available under ``cuda.core`` since v0.5.0. Code that imports from
+  ``cuda.core.experimental`` must be updated to import from ``cuda.core``
+  instead.
 - Graph types are no longer re-exported from the top-level ``cuda.core``
   namespace; they must be imported from :mod:`cuda.core.graph`. The affected
   symbols are :class:`~graph.Graph`, :class:`~graph.GraphBuilder`,
@@ -32,8 +107,7 @@ Breaking changes
   :class:`~graph.GraphDebugPrintOptions`, and :class:`~graph.GraphDefinition`.
   Update ``from cuda.core import GraphBuilder`` to
   ``from cuda.core.graph import GraphBuilder`` (and similarly for the other
-  symbols). The same symbols are also no longer forwarded through the
-  deprecated ``cuda.core.experimental`` namespace.
+  symbols).
 - Removed the ``GraphAllocOptions`` dataclass and the
   ``AllocNode.options`` property. Its fields are now keyword-only
   parameters on :meth:`graph.GraphDefinition.allocate` and
@@ -171,8 +245,69 @@ Breaking changes
   - :obj:`cuda.core.typing.DevicePointerT` -> :obj:`cuda.core.typing.DevicePointerType`
   - :obj:`cuda.core.typing.IsStreamT` -> :obj:`cuda.core.typing.IsStreamType`
 
-- :func:`args_viewable_as_strided_memory` and :class:`StridedMemoryView` are now
-  longer at the top-level in :mod:`cuda.core`.  They are available publicly from the
+- Renamed and converted multiple :class:`~system.Device` properties and methods
+  for naming consistency
+  (`#1946 <https://github.com/NVIDIA/cuda-python/pull/1946>`__):
+
+  On :class:`~system.Device`:
+
+  - ``is_c2c_mode_enabled`` -> ``is_c2c_enabled``
+  - ``persistence_mode_enabled`` -> ``is_persistence_mode_enabled``
+  - ``clock(clock_type)`` -> ``get_clock(clock_type)``
+  - ``get_auto_boosted_clocks_enabled()`` -> ``is_auto_boosted_clocks_enabled``
+    (method -> property)
+  - ``get_current_clock_event_reasons()`` -> ``current_clock_event_reasons``
+    (method -> property)
+  - ``get_supported_clock_event_reasons()`` -> ``supported_clock_event_reasons``
+    (method -> property)
+  - ``display_mode`` -> ``is_display_connected``
+  - ``display_active`` -> ``is_display_active``
+  - ``fan(fan=0)`` -> ``get_fan(fan=0)``
+  - ``get_supported_pstates()`` -> ``supported_pstates``
+    (method -> property)
+
+  On ``PciInfo``:
+
+  - ``get_max_pcie_link_generation()`` -> ``link_generation`` (method -> property)
+  - ``get_gpu_max_pcie_link_generation()`` -> ``max_link_generation``
+    (method -> property)
+  - ``get_max_pcie_link_width()`` -> ``max_link_width`` (method -> property)
+  - ``get_current_pcie_link_generation()`` -> ``current_link_generation``
+    (method -> property)
+  - ``get_current_pcie_link_width()`` -> ``current_link_width``
+    (method -> property)
+  - ``get_pcie_throughput(counter)`` -> ``get_throughput(counter)``
+  - ``get_pcie_replay_counter()`` -> ``replay_counter`` (method -> property)
+
+  On ``Temperature``:
+
+  - ``sensor(sensor=...)`` -> ``get_sensor(sensor=...)``
+  - ``threshold(threshold_type)`` -> ``get_threshold(threshold_type)``
+  - ``thermal_settings(sensor_index)`` -> ``get_thermal_settings(sensor_index)``
+
+  On ``FanInfo``:
+
+  - ``set_default_fan_speed()`` -> ``set_default_speed()``
+
+- Re-wrapped NVML enums as human-readable ``StrEnum`` subclasses instead of raw
+  integer re-exports from ``cuda.bindings.nvml``. These are available in
+  ``cuda.core.system.typing``.
+  (`#2014 <https://github.com/NVIDIA/cuda-python/pull/2014>`__)
+- Removed 18 helper/data-container classes from ``cuda.core.system.__all__``:
+  ``BAR1MemoryInfo``, ``ClockInfo``, ``ClockOffsets``, ``CoolerInfo``,
+  ``DeviceAttributes``, ``DeviceEvents``, ``EventData``, ``FanInfo``,
+  ``FieldValue``, ``FieldValues``, ``GpuDynamicPstatesInfo``,
+  ``GpuDynamicPstatesUtilization``, ``InforomInfo``, ``PciInfo``,
+  ``RepairStatus``, ``Temperature``, ``ThermalSensor``, ``ThermalSettings``.
+  These classes are still returned by :class:`~system.Device` properties and
+  methods but should not be directly instantiated by users.
+  (`#1942 <https://github.com/NVIDIA/cuda-python/pull/1942>`__)
+- :attr:`system.Device.uuid` now returns the full NVML UUID with prefix
+  (e.g. ``GPU-...``). Use :attr:`system.Device.uuid_without_prefix` for
+  the previous behavior.
+  (`#1916 <https://github.com/NVIDIA/cuda-python/pull/1916>`__)
+- :func:`args_viewable_as_strided_memory` and :class:`StridedMemoryView` were accidentally
+  exposed at the top-level in :mod:`cuda.core`.  They are available publicly from the
   :mod:`cuda.core.utils` module.
   (`#2028 <https://github.com/NVIDIA/cuda-python/issues/2028>`__)
 
@@ -182,36 +317,43 @@ Breaking changes
   NVML) and :func:`system.get_kernel_mode_driver_version` (requires
   NVML).  Each returns a ``tuple[int, ...]``.
 
+
 Fixes and enhancements
 -----------------------
 
-- :class:`~utils.StridedMemoryView` now provides a fast path for ``torch.Tensor``
-  objects via PyTorch's AOT Inductor (AOTI) stable C ABI. When a ``torch.Tensor``
-  is passed to any ``from_*`` classmethod (``from_dlpack``,
-  ``from_cuda_array_interface``, ``from_array_interface``, or
-  ``from_any_interface``), tensor metadata is read directly from the underlying
-  C struct, bypassing the DLPack and CUDA Array Interface protocol overhead.
-  This yields ~7-20x faster ``StridedMemoryView`` construction for PyTorch
-  tensors (depending on whether stream ordering is required).  Proper CUDA stream ordering is established between PyTorch's current
-  stream and the consumer stream, matching the DLPack synchronization contract.
-  Requires PyTorch >= 2.3.
-  (`#749 <https://github.com/NVIDIA/cuda-python/issues/749>`__)
-
-- Enums are not available in places where a small number of string values are
-  accepted or returned.  You may continue to use the string values, or use
-  enumerations for better linting and type-checking.
-  (`#2016 <https://github.com/NVIDIA/cuda-python/issues/2016>`__)
-  The new enums are:
-
-  - :class:`cuda.core.typing.CompilerBackendType`
-  - :class:`cuda.core.typing.GraphConditionalType`
-  - :class:`cuda.core.typing.GraphMemoryType`
-  - :class:`cuda.core.typing.ManagedMemoryLocationType`
-  - :class:`cuda.core.typing.ObjectCodeFormatType`
-  - :class:`cuda.core.typing.PCHStatusType`
-  - :class:`cuda.core.typing.SourceCodeType`
-  - :class:`cuda.core.typing.VirtualMemoryAccessType`
-  - :class:`cuda.core.typing.VirtualMemoryAllocationType`
-  - :class:`cuda.core.typing.VirtualMemoryGranularityType`
-  - :class:`cuda.core.typing.VirtualMemoryHandleType`
-  - :class:`cuda.core.typing.VirtualMemoryLocationType`
+- Fixed :attr:`Buffer.is_managed` returning ``False`` for pool-allocated managed
+  memory (:class:`ManagedMemoryResource`), which caused DLPack interop to
+  misclassify managed buffers as ``kDLCUDAHost``. The fix queries both the
+  driver pointer attribute and the memory resource.
+  (`#1924 <https://github.com/NVIDIA/cuda-python/pull/1924>`__)
+- :attr:`system.Device.arch` now returns ``UNKNOWN`` instead of raising
+  ``ValueError`` when NVML reports an architecture not yet in the enum.
+  (`#1937 <https://github.com/NVIDIA/cuda-python/pull/1937>`__)
+- :meth:`system.Device.get_field_values` and
+  :meth:`system.Device.clear_field_values` with an empty list no longer raise
+  ``InvalidArgumentError``.
+  (`#1982 <https://github.com/NVIDIA/cuda-python/pull/1982>`__)
+- :class:`Linker` error and info log retrieval now properly checks return codes
+  from nvJitLink, raising exceptions on failure instead of silently ignoring
+  errors.
+  (`#1993 <https://github.com/NVIDIA/cuda-python/pull/1993>`__)
+- Fixed a potential crash when NVML event set creation failed on Windows, due to
+  ``__dealloc__`` freeing an uninitialized handle.
+  (`#1992 <https://github.com/NVIDIA/cuda-python/pull/1992>`__)
+- CUDA Runtime error messages are now more reliable, especially on Windows
+  where the runtime DLL name table could disagree with the installed bindings.
+  (`#2003 <https://github.com/NVIDIA/cuda-python/pull/2003>`__)
+- Graph kernel nodes now prevent Python kernel-argument objects from being
+  garbage-collected before the graph executes. Previously, objects passed as
+  kernel arguments (e.g. a :class:`Buffer`) could be freed if the only Python
+  reference was through the launch call, causing the graph to operate on stale
+  device pointers.
+  (`#2041 <https://github.com/NVIDIA/cuda-python/pull/2041>`__)
+- Fixed a potential crash in ``DeviceEvents.__dealloc__`` when ``__init__``
+  raised before the NVML event set was created, due to freeing an uninitialized
+  handle.
+  (`#2047 <https://github.com/NVIDIA/cuda-python/pull/2047>`__)
+- Linux release wheels are now stripped of debug symbols, significantly reducing
+  package size. Debug builds are now supported via
+  ``--config-settings=debug=true``.
+  (`#1890 <https://github.com/NVIDIA/cuda-python/pull/1890>`__)
diff --git a/cuda_core/docs/source/support.rst b/cuda_core/docs/source/support.rst
new file mode 100644
index 00000000000..3a6548ce204
--- /dev/null
+++ b/cuda_core/docs/source/support.rst
@@ -0,0 +1,87 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+.. _cuda-core-support:
+
+``cuda.core`` Support Policy
+============================
+
+Versioning Scheme
+-----------------
+
+``cuda.core`` follows `Semantic Versioning (SemVer) <https://semver.org/>`_ with the version
+format ``major.minor.patch``:
+
+- **Major**: Bumped when a new CUDA major release is out and support for the oldest CUDA major
+  version is dropped. Breaking API changes only happen at major-version boundaries.
+- **Minor**: Bumped when new, backward-compatible features are added, or when a new Python feature
+  release is out and the oldest supported Python version reaches EOL.
+- **Patch**: Bumped for bug fixes and backward-compatible maintenance updates.
+
+Unlike ``cuda.bindings``, the ``cuda.core`` version is *not* aligned with the CUDA Toolkit version.
+Consult the table below or the :doc:`release notes <release>` to determine which CUDA versions are
+supported by a given ``cuda.core`` release.
+
+Project Lifecycle & Release Cadence
+***********************************
+
+- ``cuda.core`` follows its own release cadence, independent of CUDA Toolkit releases, as long as
+  SemVer guarantees are maintained.
+
+   - We currently aim for bimonthly releases, though this is subject to change.
+
+- Major version releases are aligned to CUDA major version releases.
+- New features may be delivered in minor releases at any time — not gated by the CUDA Toolkit
+  release schedule.
+- Patch releases can be made on an as-needed basis, subject to urgency and the team's bandwidth.
+- We currently do not plan to maintain multiple releases, nor have any backport policy for new features or bug fixes.
+- Deprecation notices will be issued at least for one (1) minor release, before the actual removal
+  happens.
+
+CUDA Version Support
+--------------------
+
+``cuda.core`` is actively maintained to support the two (2) most recent CUDA major versions. For
+example, ``cuda.core`` 1.x supports CUDA 12 and 13.
+
+In particular, what this entails is that all CUDA minor versions within the two major releases
+(12.x, 13.x) are supported by the same ``cuda-core`` package.
+
+When a new CUDA major version is released and support for the oldest major version is dropped,
+``cuda.core`` will release a new major version (e.g., 1.x → 2.0.0).
+
+.. list-table:: CUDA Version Support Matrix
+   :header-rows: 1
+
+   * - ``cuda.core`` version
+     - Supported CUDA versions
+   * - 1.x
+     - 12, 13
+
+As with any CUDA library, certain features may impose additional requirements on the minimum
+``cuda-bindings``, CUDA library, or CUDA driver versions. Refer to the individual module
+documentation for details.
+
+Python Version Support
+----------------------
+
+``cuda.core`` supports all Python versions following the `CPython EOL schedule
+<https://devguide.python.org/versions/>`_. As of writing, Python 3.10 – 3.14 are supported.
+
+When a new Python feature version is released and the oldest supported version reaches EOL,
+``cuda.core`` will bump its minor version accordingly.
+
+Free-threading Build Support
+----------------------------
+
+Starting ``cuda-core`` 0.4.0, packages for the `free-threaded interpreter
+<https://docs.python.org/3/howto/free-threading-python.html>`_ are shipped to PyPI and conda-forge.
+This support is currently *experimental*.
+
+For now, you are responsible for making sure that calls into the underlying CUDA libraries
+are thread-safe. This is subject to change.
+
+----
+
+The NVIDIA CUDA Python team reserves the right to amend the above support policy. Any major changes,
+however, will be announced to users in advance.
diff --git a/cuda_core/tests/test_experimental_backward_compat.py b/cuda_core/tests/test_experimental_backward_compat.py
deleted file mode 100644
index 98af4a9557a..00000000000
--- a/cuda_core/tests/test_experimental_backward_compat.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Tests for backward compatibility of cuda.core.experimental namespace.
-
-These tests verify that the experimental namespace forwarding stubs work
-correctly and emit appropriate deprecation warnings.
-
-Note: This test function is assumed to be the only function importing
-cuda.core.experimental in the test suite to avoid race conditions when
-tests run in parallel.
-"""
-
-import sys
-
-import pytest
-
-
-def test_experimental_backward_compatibility():
-    """Test backward compatibility of cuda.core.experimental namespace.
-
-    This single test function combines all experimental namespace tests to
-    avoid race conditions when tests run in parallel. All tests that need to
-    verify deprecation warnings or module state should be in this function.
-    """
-    # Defensive: ensure module is not cached (handles case where it might
-    # already be imported by other tests or conftest)
-    if "cuda.core.experimental" in sys.modules:
-        del sys.modules["cuda.core.experimental"]
-
-    # Test 1: Main module import - should emit deprecation warning
-    with pytest.deprecated_call():
-        import cuda.core.experimental
-
-    # Test that symbols are accessible
-    assert hasattr(cuda.core.experimental, "Device")
-    assert hasattr(cuda.core.experimental, "Stream")
-    assert hasattr(cuda.core.experimental, "Buffer")
-    assert hasattr(cuda.core.experimental, "system")
-
-    # Test 2: Direct imports - should emit deprecation warning
-    # Clear cached module again to ensure warning is emitted
-    del sys.modules["cuda.core.experimental"]
-
-    with pytest.deprecated_call():
-        from cuda.core.experimental import (
-            Buffer,
-            Device,
-            Stream,
-        )
-
-    # Verify objects are usable
-    assert Device is not None
-    assert Stream is not None
-    assert Buffer is not None
-
-    # Test 3: Symbols are the same objects as core
-    import cuda.core
-
-    # Compare classes/types
-    assert cuda.core.experimental.Device is cuda.core.Device
-    assert cuda.core.experimental.Stream is cuda.core.Stream
-    assert cuda.core.experimental.Buffer is cuda.core.Buffer
-    assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource
-    assert cuda.core.experimental.Program is cuda.core.Program
-    assert cuda.core.experimental.Kernel is cuda.core.Kernel
-    assert cuda.core.experimental.ObjectCode is cuda.core.ObjectCode
-    assert cuda.core.experimental.Event is cuda.core.Event
-    assert cuda.core.experimental.Linker is cuda.core.Linker
-
-    # Compare singletons
-    assert cuda.core.experimental.system is cuda.core.system
-
-    # Test 4: Utils module works
-    # Note: The deprecation warning is only emitted once at import time when
-    # cuda.core.experimental is first imported. Accessing utils or importing
-    # from utils does not trigger additional warnings since utils is already
-    # set as an attribute in the module namespace.
-    assert hasattr(cuda.core.experimental, "utils")
-    assert cuda.core.experimental.utils is not None
-
-    # Should have expected utilities (no warning on import from utils submodule)
-    from cuda.core.experimental.utils import StridedMemoryView, args_viewable_as_strided_memory
-
-    assert StridedMemoryView is not None
-    assert args_viewable_as_strided_memory is not None
-
-    # Test 5: Options classes are accessible
-    assert hasattr(cuda.core.experimental, "EventOptions")
-    assert hasattr(cuda.core.experimental, "StreamOptions")
-    assert hasattr(cuda.core.experimental, "LaunchConfig")
-    assert hasattr(cuda.core.experimental, "ProgramOptions")
-    assert hasattr(cuda.core.experimental, "LinkerOptions")
-    assert hasattr(cuda.core.experimental, "DeviceMemoryResourceOptions")
-    assert hasattr(cuda.core.experimental, "VirtualMemoryResourceOptions")
-
-    # Verify they're the same objects
-    assert cuda.core.experimental.EventOptions is cuda.core.EventOptions
-    assert cuda.core.experimental.StreamOptions is cuda.core.StreamOptions
-    assert cuda.core.experimental.LaunchConfig is cuda.core.LaunchConfig
-
-    # Test 6: Memory-related classes are accessible
-    assert hasattr(cuda.core.experimental, "MemoryResource")
-    assert hasattr(cuda.core.experimental, "DeviceMemoryResource")
-    assert hasattr(cuda.core.experimental, "LegacyPinnedMemoryResource")
-    assert hasattr(cuda.core.experimental, "VirtualMemoryResource")
-    assert hasattr(cuda.core.experimental, "GraphMemoryResource")
-
-    # Verify they're the same objects
-    assert cuda.core.experimental.MemoryResource is cuda.core.MemoryResource
-    assert cuda.core.experimental.DeviceMemoryResource is cuda.core.DeviceMemoryResource
-
-    # Test 7: Objects can be instantiated through experimental namespace
-    # (No deprecation warning expected since module is already imported)
-    device = cuda.core.experimental.Device()
-
-    assert device is not None
-
-    # Verify it's the same type
-    from cuda.core import Device as CoreDevice
-
-    assert isinstance(device, CoreDevice)
diff --git a/cuda_python/DESCRIPTION.rst b/cuda_python/DESCRIPTION.rst
index 6120a568023..90bf5c127a4 100644
--- a/cuda_python/DESCRIPTION.rst
+++ b/cuda_python/DESCRIPTION.rst
@@ -10,8 +10,8 @@ CUDA Python is the home for accessing NVIDIA's CUDA platform from Python. It con
 * `cuda.core <https://nvidia.github.io/cuda-python/cuda-core/latest>`_: Pythonic access to CUDA Runtime and other core functionality
 * `cuda.bindings <https://nvidia.github.io/cuda-python/cuda-bindings/latest>`_: Low-level Python bindings to CUDA C APIs
 * `cuda.pathfinder <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest>`_: Utilities for locating CUDA components installed in the user's Python environment
-* `cuda.coop <https://nvidia.github.io/cccl/python/coop>`_: A Python module providing CCCL's reusable block-wide and warp-wide *device* primitives for use within Numba CUDA kernels
-* `cuda.compute <https://nvidia.github.io/cccl/python/compute>`_: A Python module for easy access to CCCL's highly efficient and customizable parallel algorithms, like ``sort``, ``scan``, ``reduce``, ``transform``, etc. that are callable on the *host*
+* `cuda.coop <https://nvidia.github.io/cccl/unstable/python/coop.html>`_: A Python module providing CCCL's reusable block-wide and warp-wide *device* primitives for use within Numba CUDA kernels
+* `cuda.compute <https://nvidia.github.io/cccl/unstable/python/compute/index.html>`_: A Python module for easy access to CCCL's highly efficient and customizable parallel algorithms, like ``sort``, ``scan``, ``reduce``, ``transform``, etc. that are callable on the *host*
 * `numba.cuda <https://nvidia.github.io/numba-cuda/>`_: A Python DSL that exposes CUDA **SIMT** programming model and compiles a restricted subset of Python code into CUDA kernels and device functions
 * `cuda.tile <https://docs.nvidia.com/cuda/cutile-python/>`_: A new Python DSL that exposes CUDA **Tile** programming model and allows users to write NumPy-like code in CUDA kernels
 * `nvmath-python <https://docs.nvidia.com/cuda/nvmath-python/latest>`_: Pythonic access to NVIDIA CPU & GPU Math Libraries, with `host <https://docs.nvidia.com/cuda/nvmath-python/latest/overview.html#host-apis>`_, `device <https://docs.nvidia.com/cuda/nvmath-python/latest/overview.html#device-apis>`_, and `distributed <https://docs.nvidia.com/cuda/nvmath-python/latest/distributed-apis/index.html>`_ APIs. It also provides low-level Python bindings to host C APIs (`nvmath.bindings <https://docs.nvidia.com/cuda/nvmath-python/latest/bindings/index.html>`_).
@@ -52,4 +52,6 @@ The list of available interfaces is:
 * NVRTC
 * nvJitLink
 * NVVM
+* nvFatbin
 * cuFile
+* NVML
diff --git a/cuda_python/docs/source/index.rst b/cuda_python/docs/source/index.rst
index 7aad94ef9c4..458a7a03229 100644
--- a/cuda_python/docs/source/index.rst
+++ b/cuda_python/docs/source/index.rst
@@ -20,8 +20,8 @@ multiple components:
 - `CUPTI Python`_: Python APIs for creation of profiling tools that target CUDA Python applications via the CUDA Profiling Tools Interface (CUPTI)
 - `Accelerated Computing Hub`_: Open-source learning materials related to GPU computing. You will find user guides, tutorials, and other works freely available for all learners interested in GPU computing.
 
-.. _cuda.coop: https://nvidia.github.io/cccl/python/coop
-.. _cuda.compute: https://nvidia.github.io/cccl/python/compute
+.. _cuda.coop: https://nvidia.github.io/cccl/unstable/python/coop.html
+.. _cuda.compute: https://nvidia.github.io/cccl/unstable/python/compute/index.html
 .. _numba.cuda: https://nvidia.github.io/numba-cuda/
 .. _cuda.tile: https://docs.nvidia.com/cuda/cutile-python/
 .. _nvmath-python: https://docs.nvidia.com/cuda/nvmath-python/latest
@@ -50,8 +50,8 @@ be available, please refer to the `cuda.bindings`_ documentation for installatio
    cuda.core <https://nvidia.github.io/cuda-python/cuda-core/latest>
    cuda.bindings <https://nvidia.github.io/cuda-python/cuda-bindings/latest>
    cuda.pathfinder <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest>
-   cuda.coop <https://nvidia.github.io/cccl/python/coop>
-   cuda.compute <https://nvidia.github.io/cccl/python/compute>
+   cuda.coop <https://nvidia.github.io/cccl/unstable/python/coop.html>
+   cuda.compute <https://nvidia.github.io/cccl/unstable/python/compute/index.html>
    numba.cuda <https://nvidia.github.io/numba-cuda/>
    cuda.tile <https://docs.nvidia.com/cuda/cutile-python/>
    nvmath-python <https://docs.nvidia.com/cuda/nvmath-python/>