diff --git a/kwave/kspaceFirstOrder.py b/kwave/kspaceFirstOrder.py index 3a71cccd..351de1b8 100644 --- a/kwave/kspaceFirstOrder.py +++ b/kwave/kspaceFirstOrder.py @@ -82,6 +82,40 @@ def _strip_pml(result, pml_size, ndim, suffixes=_FULL_GRID_SUFFIXES): } +def _resolve_dtype(value): + """Normalize a dtype-like input to ``np.float32`` or ``np.float64``. + + Accepts numpy dtypes/types (``np.float32``, ``np.float64``), strings + (``"float32"`` etc., plus MATLAB aliases ``"single"`` / ``"double"``), + Python ``float``, ``None`` (default → float64), and the legacy MATLAB + ``"off"`` alias for float64. Anything that resolves to a non-float32 / + non-float64 dtype raises ``ValueError`` — the solver isn't validated + for ``float16`` / complex dtypes. + + Cupy dtypes (``cp.float32``, ``cp.float64``) work for free because cupy + re-exports numpy's scalar types. Torch / JAX dtypes are not accepted — + they live in different ecosystems and don't translate via ``np.dtype()``; + the error message points the user at the equivalent numpy dtype. + """ + if value is None or value == "off": + return np.float64 + try: + resolved = np.dtype(value).type + except TypeError as e: + framework = getattr(type(value), "__module__", "").split(".")[0] + hint = "" + if framework in ("torch", "jax", "jaxlib", "tensorflow"): + hint = f" {framework}.dtype objects aren't supported; pass the equivalent numpy dtype (np.float32 / np.float64)." + raise ValueError( + f"dtype must be a numpy dtype, type, or string (e.g. 'float32', 'single'), got {value!r}.{hint}" + ) from e + if resolved is np.float32: + return np.float32 + if resolved is np.float64: + return np.float64 + raise ValueError(f"dtype must resolve to float32 or float64; got {resolved.__name__} from {value!r}") + + def kspaceFirstOrder( kgrid: kWaveGrid, medium: kWaveMedium, @@ -96,6 +130,7 @@ def kspaceFirstOrder( smooth_p0: bool = True, backend: str = "python", device: str = "cpu", + dtype=None, save_only: bool = False, data_path: Optional[str] = None, quiet: bool = False, @@ -142,6 +177,24 @@ def kspaceFirstOrder( device: ``"cpu"`` or ``"gpu"``. For ``backend="python"`` this selects NumPy (cpu) vs CuPy (gpu). For ``backend="cpp"`` it selects the OMP vs CUDA binary. Default ``"cpu"``. + dtype: Numerical precision for state arrays in the Python backend. + Accepts dtype-like input — a numpy dtype (``np.float32``, + ``np.float64``; cupy aliases like ``cp.float32`` work since cupy + re-exports numpy's scalar types), a string (``"float32"``, + ``"float64"``, ``"single"``, ``"double"``), a Python type + (``float``), or ``None`` for the default (float64). The + MATLAB-style alias ``"off"`` is accepted as a synonym for + float64 to ease migration from the legacy + ``SimulationOptions.data_cast``. Torch / JAX dtypes are not + accepted; pass the numpy equivalent (e.g. ``np.float32`` for + ``torch.float32``). + ``np.float32`` uses roughly half the memory and is faster on + most hardware, at the cost of reduced numerical accuracy. + Only ``float32`` and ``float64`` are supported; other dtypes + raise ``ValueError``. Has no effect on ``backend="cpp"`` (the + C++ binary uses fixed internal precision regardless); a warning + is emitted if ``dtype`` resolves to anything other than float64 + with the C++ backend. Default ``None`` (float64). save_only: When ``True`` (``backend="cpp"`` only), write the HDF5 input file and return without running the binary. Useful for cluster submission. Default ``False``. @@ -167,6 +220,7 @@ def kspaceFirstOrder( raise ValueError(f"device must be 'cpu' or 'gpu', got {device!r}") if backend not in ("python", "cpp"): raise ValueError(f"Unknown backend: {backend!r}. Use 'python' or 'cpp'.") + dtype = _resolve_dtype(dtype) if isinstance(pml_size, str) and pml_size.lower() == "auto": pml_size = tuple(int(x) for x in get_optimal_pml_size(kgrid)) @@ -206,6 +260,7 @@ def kspaceFirstOrder( pml_size=pml_size, pml_alpha=pml_alpha, quiet=quiet, + dtype=dtype, ).run() elif backend == "cpp": @@ -215,6 +270,14 @@ def kspaceFirstOrder( check_alpha_mode_cpp_compatible(medium) warn_alpha_power_near_unity_cpp(medium) + if dtype is not np.float64: + warnings.warn( + f"dtype={np.dtype(dtype).name!r} has no effect with backend='cpp'; the C++ binary " + "uses fixed internal precision regardless. Use backend='python' to control " + "computational precision.", + stacklevel=2, + ) + if not use_kspace: warnings.warn( "use_kspace=False has no effect with backend='cpp'; the C++ binary always applies k-space correction.", diff --git a/kwave/solvers/kspace_solver.py b/kwave/solvers/kspace_solver.py index 2ce239d0..0ec70ce4 100644 --- a/kwave/solvers/kspace_solver.py +++ b/kwave/solvers/kspace_solver.py @@ -32,40 +32,55 @@ def _to_cpu(x): return x.get() if hasattr(x, "get") else x -def _expand_to_grid(val, grid_shape, xp, name="parameter"): +def _array_sum(arrays): + """Sum arrays, preserving dtype. + + ``sum(arrays)`` starts from Python ``int 0``; under numpy < 2 (NEP 50) + that promotes float32 inputs to float64. Starting from ``arrays[0]`` + keeps the result's dtype equal to the elements'. + """ + out = arrays[0] + for a in arrays[1:]: + out = out + a + return out + + +def _expand_to_grid(val, grid_shape, xp, name="parameter", dtype=float): if val is None: raise ValueError(f"Missing required parameter: {name}") - arr = xp.array(val, dtype=float).ravel() + arr = xp.array(val, dtype=dtype).ravel() grid_size = int(np.prod(grid_shape)) if arr.size == 1: - return xp.full(grid_shape, float(arr[0]), dtype=float) + return xp.full(grid_shape, arr[0], dtype=dtype) if arr.size == grid_size: return arr.reshape(grid_shape) raise ValueError(f"{name} size {arr.size} incompatible with grid size {grid_size}") -def _build_source_op(mask_raw, signal_raw, mode, scale, *, xp, grid_shape, grid_size, source_kappa, diff_fn): +def _build_source_op(mask_raw, signal_raw, mode, scale, *, xp, grid_shape, grid_size, source_kappa, diff_fn, dtype=float): """Build a source injection operator for one field variable. Returns a callable (t, field) → field that injects scaled source values. + ``dtype`` controls the precision of the source signal buffer (matches the + field precision set by ``data_cast``). """ mask = xp.array(mask_raw, dtype=bool).ravel() if mask.size == 1: mask = xp.full(grid_shape, bool(mask[0]), dtype=bool).ravel() n_src = int(xp.sum(mask)) - signal_arr = xp.array(signal_raw, dtype=float) + signal_arr = xp.array(signal_raw, dtype=dtype) if signal_arr.ndim == 1: signal = signal_arr.reshape(1, -1) else: signal = signal_arr.reshape(-1, signal_arr.shape[-1]) if signal_arr.ndim > 2 else signal_arr - scaled = signal * xp.atleast_1d(xp.asarray(scale))[:, None] + scaled = signal * xp.atleast_1d(xp.asarray(scale, dtype=dtype))[:, None] signal_len = scaled.shape[1] def get_val(t): if scaled.shape[0] == 1: - return xp.full(n_src, float(scaled[0, t])) + return xp.full(n_src, scaled[0, t], dtype=dtype) return scaled[:, t] def dirichlet(t, field): @@ -76,7 +91,7 @@ def dirichlet(t, field): return flat.reshape(grid_shape) # Pre-allocate buffer to avoid per-step allocation - _src_buf = xp.zeros(grid_size, dtype=float) + _src_buf = xp.zeros(grid_size, dtype=dtype) def additive_kspace(t, field): if t >= signal_len: @@ -131,6 +146,7 @@ def __init__( pml_size=None, pml_alpha=None, quiet=False, + dtype=None, ): self.kgrid = kgrid self.medium = medium @@ -142,6 +158,25 @@ def __init__( self.quiet = quiet self._pml_size_override = pml_size self._pml_alpha_override = pml_alpha + # Compute precision for state arrays. ``None`` defaults to float64 (matches + # MATLAB k-Wave). Only float32 / float64 are validated by the solver. + if dtype is None: + self._dtype = np.float64 + elif dtype in (np.float32, np.float64): + self._dtype = dtype + else: + try: + resolved = np.dtype(dtype).type + except TypeError as e: + raise ValueError(f"dtype must be np.float32 or np.float64 (or string equivalent), got {dtype!r}") from e + if resolved not in (np.float32, np.float64): + raise ValueError(f"dtype must resolve to float32 or float64, got {resolved.__name__}") + self._dtype = resolved + # Companion complex dtype for FFT outputs. numpy<2 (np.fft) always upcasts + # to complex128 regardless of input precision; we cast back so the rest of + # the pipeline stays in self._dtype. Harmless on numpy 2+ and on cupy + # (which already respects input precision). + self._complex_dtype = np.complex64 if self._dtype is np.float32 else np.complex128 # kWaveGrid doesn't have pml_size_x attrs; warn if PML will silently be disabled if pml_size is None: from kwave.kgrid import kWaveGrid as _KWG @@ -190,9 +225,9 @@ def setup(self): self.Nt = int(self.kgrid.Nt) self.dt = float(self.kgrid.dt) - self.c0 = _expand_to_grid(self.medium.sound_speed, self.grid_shape, xp, "sound_speed") + self.c0 = _expand_to_grid(self.medium.sound_speed, self.grid_shape, xp, "sound_speed", dtype=self._dtype) density = getattr(self.medium, "density", None) - self.rho0 = _expand_to_grid(density if density is not None else 1000.0, self.grid_shape, xp, "density") + self.rho0 = _expand_to_grid(density if density is not None else 1000.0, self.grid_shape, xp, "density", dtype=self._dtype) self.c_ref = float(xp.max(self.c0)) self._setup_sensor_mask() @@ -356,26 +391,32 @@ def _setup_pml(self): if pml_size == 0 or pml_alpha == 0: shape = [1] * self.ndim shape[axis] = N - self.pml_list.append(xp.ones(shape, dtype=float)) - self.pml_sg_list.append(xp.ones(shape, dtype=float)) + self.pml_list.append(xp.ones(shape, dtype=self._dtype)) + self.pml_sg_list.append(xp.ones(shape, dtype=self._dtype)) else: - # dimension=2 gives shape (1, N) which we reshape for broadcasting + # dimension=2 gives shape (1, N) which we reshape for broadcasting. + # get_pml returns float64; cast so the per-step PML multiply doesn't + # upcast self.p / self.u (would silently break dtype='single'). pml = get_pml(N, dx, self.dt, self.c_ref, pml_size, pml_alpha, staggered=False, dimension=2, xp=xp) pml_sg = get_pml(N, dx, self.dt, self.c_ref, pml_size, pml_alpha, staggered=True, dimension=2, xp=xp) shape = [1] * self.ndim shape[axis] = N - self.pml_list.append(pml.flatten().reshape(shape)) - self.pml_sg_list.append(pml_sg.flatten().reshape(shape)) + self.pml_list.append(pml.flatten().reshape(shape).astype(self._dtype)) + self.pml_sg_list.append(pml_sg.flatten().reshape(shape).astype(self._dtype)) def _setup_kspace_operators(self): """Build k-space gradient/divergence operators for each dimension.""" xp = self.xp self.k_list = [] - # First pass: build k-vectors for each dimension + # First pass: build k-vectors for each dimension. + # ``fftfreq`` returns float64 by default; cast so the k-space operators + # (kappa, op_grad_list, op_div_list, _k_mag) match self._dtype. Without + # this cast, _diff's FFT round-trip with a float64 op upcasts the + # float32 field back to float64 -- silently breaking dtype='single'. for axis, (N, dx) in enumerate(zip(self.grid_shape, self.spacing)): - k = 2 * np.pi * xp.fft.fftfreq(N, d=dx) + k = (2 * np.pi * xp.fft.fftfreq(N, d=dx)).astype(self._dtype) shape = [1] * self.ndim shape[axis] = N self.k_list.append(k.reshape(shape)) @@ -424,7 +465,7 @@ def _alpha_neper_and_power(self): if not _is_enabled(getattr(self.medium, "alpha_coeff", 0)): return None, None alpha_power = float(self.xp.array(getattr(self.medium, "alpha_power", 1.5)).flatten()[0]) - alpha_coeff = _expand_to_grid(self.medium.alpha_coeff, self.grid_shape, self.xp, "alpha_coeff") + alpha_coeff = _expand_to_grid(self.medium.alpha_coeff, self.grid_shape, self.xp, "alpha_coeff", dtype=self._dtype) return db2neper(alpha_coeff, alpha_power), alpha_power def _init_absorption(self, alpha_np, alpha_power): @@ -504,9 +545,9 @@ def _init_nonlinearity(self): self._nonlinearity = lambda rho: 0 self._nl_factor = lambda rho_split: 1.0 else: - self.BonA = _expand_to_grid(BonA_raw, self.grid_shape, self.xp, "BonA") + self.BonA = _expand_to_grid(BonA_raw, self.grid_shape, self.xp, "BonA", dtype=self._dtype) self._nonlinearity = lambda rho: self.BonA * rho**2 / (2 * self.rho0) - self._nl_factor = lambda rho_split: (2 * sum(rho_split) + self.rho0) / self.rho0 + self._nl_factor = lambda rho_split: (2 * _array_sum(rho_split) + self.rho0) / self.rho0 def _setup_source_operators(self): """Build time-varying source injection operators. @@ -560,6 +601,7 @@ def build_op(mask_raw, signal_raw, mode, scale): grid_size=grid_size, source_kappa=self.source_kappa, diff_fn=self._diff, + dtype=self._dtype, ) # --- Pressure source (per-axis spacing for non-isotropic grids) --- @@ -606,10 +648,10 @@ def _setup_fields(self): """Initialize pressure, velocity, and density fields.""" xp = self.xp - self.p = xp.zeros(self.grid_shape, dtype=float) - self.u = [xp.zeros(self.grid_shape, dtype=float) for _ in range(self.ndim)] + self.p = xp.zeros(self.grid_shape, dtype=self._dtype) + self.u = [xp.zeros(self.grid_shape, dtype=self._dtype) for _ in range(self.ndim)] # Split density per dimension enables independent PML absorption in each direction - self.rho_split = [xp.zeros(self.grid_shape, dtype=float) for _ in range(self.ndim)] + self.rho_split = [xp.zeros(self.grid_shape, dtype=self._dtype) for _ in range(self.ndim)] if self.use_sg: self.rho0_staggered = [self._stagger(self.rho0, axis) for axis in range(self.ndim)] @@ -623,12 +665,12 @@ def _setup_fields(self): # Sensor data storage (sized based on record_start_index) self.sensor_data = {} if "p" in self.record: - self.sensor_data["p"] = xp.zeros((self.n_sensor_points, self.num_recorded_time_points), dtype=float) + self.sensor_data["p"] = xp.zeros((self.n_sensor_points, self.num_recorded_time_points), dtype=self._dtype) for a in "xyz"[: self.ndim]: for suffix in ("", "_staggered"): v = f"u{a}{suffix}" if v in self.record: - self.sensor_data[v] = xp.zeros((self.n_sensor_points, self.num_recorded_time_points), dtype=float) + self.sensor_data[v] = xp.zeros((self.n_sensor_points, self.num_recorded_time_points), dtype=self._dtype) # Spectral shift: move velocity from staggered (mid-cell) to collocated (pressure) grid self.unstagger_ops = [xp.exp(-1j * self.k_list[ax] * self.spacing[ax] / 2) for ax in range(self.ndim)] @@ -636,12 +678,12 @@ def _setup_fields(self): # Initial pressure source (p0) p0_raw = getattr(self.source, "p0", 0) if _is_enabled(p0_raw): - p0 = _expand_to_grid(p0_raw, self.grid_shape, xp, "p0") + p0 = _expand_to_grid(p0_raw, self.grid_shape, xp, "p0", dtype=self._dtype) if self.smooth_p0 and self.ndim >= 2: from kwave.utils.filters import smooth # smooth() is order-agnostic (uses FFT on shape) - p0 = xp.asarray(smooth(_to_cpu(p0), restore_max=True)) + p0 = xp.asarray(smooth(_to_cpu(p0), restore_max=True), dtype=self._dtype) self._p0_initial = p0 else: self._p0_initial = None @@ -657,16 +699,16 @@ def step(self): # Momentum equation: du_i/dt = -grad_i(p)/rho, with PML # Share forward FFT of p across all gradient axes - P = xp.fft.fftn(self.p) + P = xp.fft.fftn(self.p).astype(self._complex_dtype, copy=False) for i in range(self.ndim): pml_sg = self.pml_sg_list[i] - grad_p_i = xp.real(xp.fft.ifftn(self.op_grad_list[i] * P)) + grad_p_i = xp.real(xp.fft.ifftn(self.op_grad_list[i] * P)).astype(self._dtype, copy=False) self.u[i] = pml_sg * (pml_sg * self.u[i] - self.dt_over_rho0[i] * grad_p_i) self.u[i] = self._source_u_ops[i](self.t, self.u[i]) # Mass conservation: drho_i/dt = -rho0 * div_i(u_i) * nl_factor, with PML nl_factor = self._nl_factor(self.rho_split) - div_u_total = xp.zeros(self.grid_shape, dtype=float) + div_u_total = xp.zeros(self.grid_shape, dtype=self._dtype) for i in range(self.ndim): pml = self.pml_list[i] div_u_i = self._diff(self.u[i], self.op_div_list[i]) @@ -675,7 +717,7 @@ def step(self): self.rho_split[i] = self._source_p_ops[i](self.t, self.rho_split[i]) # Equation of state: p = c0^2 * (rho + absorption - dispersion + nonlinearity) - rho_total = sum(self.rho_split) + rho_total = _array_sum(self.rho_split) self.p = self.c0_sq * ( rho_total + self._absorption(div_u_total) - self._dispersion(rho_total) + self._nonlinearity(rho_total) ) @@ -697,7 +739,7 @@ def step(self): self.sensor_data["p"][:, file_index] = self._extract(self.p) for i, a in enumerate("xyz"[: self.ndim]): if f"u{a}" in self.sensor_data: # non-staggered (collocated with pressure) - shifted = xp.real(xp.fft.ifftn(self.unstagger_ops[i] * xp.fft.fftn(self.u[i]))) + shifted = xp.real(xp.fft.ifftn(self.unstagger_ops[i] * xp.fft.fftn(self.u[i]))).astype(self._dtype, copy=False) self.sensor_data[f"u{a}"][:, file_index] = self._extract(shifted) if f"u{a}_staggered" in self.sensor_data: # raw staggered grid self.sensor_data[f"u{a}_staggered"][:, file_index] = self._extract(self.u[i]) @@ -751,7 +793,7 @@ def _diff(self, f, op): if op is None: return f xp = self.xp - return xp.real(xp.fft.ifftn(op * xp.fft.fftn(f))) + return xp.real(xp.fft.ifftn(op * xp.fft.fftn(f))).astype(self._dtype, copy=False) def _stagger(self, arr, axis): """Compute staggered grid values (average neighbors along axis).""" diff --git a/tests/test_data_cast.py b/tests/test_data_cast.py new file mode 100644 index 00000000..5eda1705 --- /dev/null +++ b/tests/test_data_cast.py @@ -0,0 +1,299 @@ +"""Tests for the ``dtype`` parameter on the modern ``kspaceFirstOrder()`` API. + +Issue #695: expose precision control on the modern API. + +The Python backend honors ``dtype`` (numpy-style dtype-like input): + - ``None`` / ``np.float64`` / ``"float64"`` / ``"double"`` / ``float`` / + ``"off"`` (legacy MATLAB alias) → float64 (default) + - ``np.float32`` / ``"float32"`` / ``"single"`` → float32 + +The C++ backend uses fixed internal precision regardless; setting ``dtype`` +to anything other than float64 warns. +""" +from copy import deepcopy + +import numpy as np +import pytest + +from kwave.data import Vector +from kwave.kgrid import kWaveGrid +from kwave.kmedium import kWaveMedium +from kwave.ksensor import kSensor +from kwave.ksource import kSource +from kwave.kspaceFirstOrder import kspaceFirstOrder + + +def _build_minimal(): + N = Vector([48, 48]) + dx = Vector([0.1e-3, 0.1e-3]) + kgrid = kWaveGrid(N, dx) + kgrid.makeTime(1500) + medium = kWaveMedium(sound_speed=1500 * np.ones(tuple(N)), density=1000 * np.ones(tuple(N))) + source = kSource() + source.p_mask = np.zeros(tuple(N), dtype=bool) + source.p_mask[N.x // 2, N.y // 2] = True + t = kgrid.t_array.squeeze() + source.p = (np.sin(2 * np.pi * 1e6 * t) * np.exp(-((t - 5e-7) ** 2) / (2e-7) ** 2))[np.newaxis, :] + sensor = kSensor(mask=np.ones(tuple(N), dtype=bool)) + return kgrid, medium, source, sensor + + +# Cover every input form the resolver should accept. +_FLOAT64_INPUTS = [None, np.float64, "float64", "double", float, "off", np.dtype("f8")] +_FLOAT32_INPUTS = [np.float32, "float32", "single", np.dtype("f4")] + + +@pytest.mark.parametrize("dtype_arg", _FLOAT64_INPUTS, ids=lambda x: repr(x)) +def test_python_backend_float64_inputs(dtype_arg): + """All float64-equivalent inputs must produce float64 output across every recorded field.""" + kgrid, medium, source, sensor = _build_minimal() + sensor.record = ("p", "p_final", "p_max", "p_min", "p_rms") + result = kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=dtype_arg, + pml_inside=False, + smooth_p0=False, + quiet=True, + ) + for field in ("p", "p_final", "p_max", "p_min", "p_rms"): + arr = np.asarray(result[field]) + assert arr.dtype == np.float64, f"dtype={dtype_arg!r}: {field} produced {arr.dtype}, expected float64" + assert not np.any(np.isnan(result["p"])) + assert np.nanmax(np.abs(result["p"])) > 0 + + +@pytest.mark.parametrize("dtype_arg", _FLOAT32_INPUTS, ids=lambda x: repr(x)) +def test_python_backend_float32_inputs(dtype_arg): + """All float32-equivalent inputs must produce float32 output across every recorded field. + + Includes p_final + aggregates (p_max/min/rms) — these caught a real dtype-drift bug + where k-space operators (kappa, op_grad/div_list) and the PML arrays were float64 + by default and silently upcast self.p back to float64 mid-step(). + """ + kgrid, medium, source, sensor = _build_minimal() + sensor.record = ("p", "p_final", "p_max", "p_min", "p_rms") + result = kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=dtype_arg, + pml_inside=False, + smooth_p0=False, + quiet=True, + ) + for field in ("p", "p_final", "p_max", "p_min", "p_rms"): + arr = np.asarray(result[field]) + assert arr.dtype == np.float32, f"dtype={dtype_arg!r}: {field} produced {arr.dtype}, expected float32" + assert not np.any(np.isnan(result["p"])) + assert np.nanmax(np.abs(result["p"])) > 0 + + +def test_default_dtype_is_float64(): + """Calling without dtype must produce float64 (back-compat with pre-#716 behavior).""" + kgrid, medium, source, sensor = _build_minimal() + result = kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + pml_inside=False, + smooth_p0=False, + quiet=True, + ) + assert np.asarray(result["p"]).dtype == np.float64 + + +@pytest.mark.parametrize("bad", [np.float16, np.complex64, "float16", "complex64", "quad", 42, "garbage"]) +def test_invalid_dtype_raises(bad): + """Non-float64/float32 dtypes (and unparseable values) must raise ValueError.""" + kgrid, medium, source, sensor = _build_minimal() + with pytest.raises(ValueError, match="dtype"): + kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=bad, + ) + + +def test_python_backend_dtype_preserved_for_staggered_velocity(): + """Recorded staggered velocity (``ux_staggered`` etc.) must preserve dtype. + + The staggered-grid extraction goes through ``unstagger_ops * fftn(u)`` -> + ``ifftn`` -> ``.real``. In numpy < 2 ``np.fft.fftn`` returns complex128 + regardless of input precision, so without an explicit cast the staggered + output would silently come back as float64 even when ``dtype=np.float32``. + """ + kgrid, medium, source, sensor = _build_minimal() + sensor.record = ("p", "ux_staggered", "uy_staggered") + result = kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=np.float32, + pml_inside=False, + smooth_p0=False, + quiet=True, + ) + for field in ("p", "ux_staggered", "uy_staggered"): + arr = np.asarray(result[field]) + assert arr.dtype == np.float32, f"{field} produced {arr.dtype}, expected float32" + + +def test_python_backend_dtype_preserved_with_nonlinearity(): + """BonA path must preserve float32 dtype. + + Regression guard for the third dtype-drift bug: ``sum(rho_split)`` in + ``_nl_factor`` and the equation-of-state line starts with Python ``int 0``, + which under numpy < 2 (NEP 50) promotes float32 to float64. Fixed by + using ``_array_sum`` which starts from the first element. + """ + kgrid, medium, source, sensor = _build_minimal() + medium.BonA = 6.0 # water-like nonlinearity + sensor.record = ("p", "p_final", "p_max") + result = kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=np.float32, + pml_inside=False, + smooth_p0=False, + quiet=True, + ) + for field in ("p", "p_final", "p_max"): + arr = np.asarray(result[field]) + assert arr.dtype == np.float32, f"With nonlinearity, {field} produced {arr.dtype}, expected float32" + + +def test_torch_like_dtype_gets_helpful_error(): + """Non-numpy framework dtypes get a hint pointing to numpy equivalents. + + Uses a fake stand-in (`__module__ = "torch"`) to avoid importing torch. + """ + kgrid, medium, source, sensor = _build_minimal() + + class FakeTorchDtype: + pass + + FakeTorchDtype.__module__ = "torch" + with pytest.raises(ValueError, match=r"torch.*pass the equivalent numpy dtype"): + kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=FakeTorchDtype(), + ) + + +def test_python_single_vs_double_numerical_agreement(): + """Single and double precision runs must agree to within float32 tolerance. + + Sanity check that float32 runs aren't producing garbage. Float32 has + ~7 decimal digits, so a relative tolerance of ~1e-4 is reasonable for + a short propagation in a uniform medium. + """ + kgrid, medium, source, sensor = _build_minimal() + p_double = np.asarray( + kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=np.float64, + pml_inside=False, + smooth_p0=False, + quiet=True, + )["p"] + ) + p_single = np.asarray( + kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="python", + device="cpu", + dtype=np.float32, + pml_inside=False, + smooth_p0=False, + quiet=True, + )["p"] + ) + scale = np.max(np.abs(p_double)) + abs_err = np.max(np.abs(p_double.astype(np.float64) - p_single.astype(np.float64))) + assert abs_err / scale < 1e-4, f"single-vs-double max relative error {abs_err / scale:.2e} > 1e-4" + + +def test_cpp_backend_warns_on_non_float64_dtype(tmp_path): + """Setting dtype=np.float32 with backend='cpp' must warn (binary uses fixed precision).""" + kgrid, medium, source, sensor = _build_minimal() + import subprocess + + with pytest.warns(UserWarning, match="dtype.*has no effect.*cpp"): + try: + kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="cpp", + device="cpu", + dtype=np.float32, + pml_inside=False, + smooth_p0=False, + pml_alpha=10, + data_path=str(tmp_path), + quiet=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + # Binary not available on this box; warning still must have fired + pass + + +def test_cpp_backend_silent_on_default_dtype(tmp_path, recwarn): + """Default dtype (None → float64) must not warn on the C++ backend.""" + kgrid, medium, source, sensor = _build_minimal() + import subprocess + + try: + kspaceFirstOrder( + kgrid=kgrid, + medium=medium, + source=deepcopy(source), + sensor=sensor, + backend="cpp", + device="cpu", + # dtype left at default + pml_inside=False, + smooth_p0=False, + pml_alpha=10, + data_path=str(tmp_path), + quiet=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + pass + assert not any("dtype" in str(w.message) and "has no effect" in str(w.message) for w in recwarn.list)