diff --git a/CHANGELOG.md b/CHANGELOG.md index e1a9173..82eedfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ _Disclaimer: this changelog is updated using generative AI, but is still verified manually._ +## v0.2.0 + +### Added +- A native **Metal compute backend** (`metal` feature, macOS only). It translates SPIR-V to MSL with `naga` at function-load time and drives Apple's Metal API directly via the `metal` crate, rather than going through `wgpu`. Includes buffer management, indirect dispatch, push constants, and GPU timestamp queries. Compiled MSL libraries are cached by SPIR-V content hash. (#3) +- `GpuPass::memory_barrier` (and an `Encoder::memory_barrier` trait method): inserts a buffer-scope memory barrier between dispatches within a compute pass. Required on Metal — which uses `MTLDispatchType::Concurrent` and does not auto-synchronize consecutive dispatches — and a no-op on backends that already insert implicit barriers (WebGPU, CUDA, CPU). (#3) +- `khal_std::build_script::setup_shader_crate_build()`: a `build.rs` helper for shader crates that emits the `manifest_dir` metadata used by `KhalBuilder::from_dependency`, and declares/sets the `target_arch_is_gpu` cfg (set for SPIR-V/NVPTX targets, unset on host CPU builds). (#3) +- `GpuBackend::is_metal` and `Backend::as_metal` accessors. (#3) + +### Changed +- The WebGPU backend now compiles shader modules with `force_loop_bounding: true` (instead of fully unchecked) to work around an apparent miscompilation of loops on some platforms (Windows + Nvidia). (#3) +- Bumped `glamx` from `0.2` to `0.3` in `khal-std`, enabling its `u32`, `i32`, and `f64` features. (#3) + ## v0.1.1 ### Added diff --git a/Cargo.toml b/Cargo.toml index 4166365..d944837 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.1.1" +version = "0.2.0" [workspace.dependencies] bytemuck = { version = "1", features = ["derive", "extern_crate_std"] } diff --git a/crates/khal-example-shaders/Cargo.toml b/crates/khal-example-shaders/Cargo.toml index 0cf0aeb..6de9692 100644 --- a/crates/khal-example-shaders/Cargo.toml +++ b/crates/khal-example-shaders/Cargo.toml @@ -24,4 +24,4 @@ khal-std = { path = "../khal-std" } workspace = true [target.'cfg(not(any(target_arch = "spirv", target_arch = "nvptx64")))'.dependencies] -khal = { version = "0.1.0", path = "../khal", features = ["derive"] } \ No newline at end of file +khal = { version = "0.2.0", path = "../khal", features = ["derive"] } \ No newline at end of file diff --git a/crates/khal-example/Cargo.toml b/crates/khal-example/Cargo.toml index 010caf4..373b666 100644 --- a/crates/khal-example/Cargo.toml +++ b/crates/khal-example/Cargo.toml @@ -12,13 +12,13 @@ cpu-parallel = ["cpu", "khal-example-shaders/cpu-parallel"] cuda = ["khal/cuda", "khal-builder/cuda", "khal-example-shaders/cuda"] [dependencies] -khal = { version = "0.1.0", path = "../khal", features = ["derive"] } +khal = { version = "0.2.0", path = "../khal", features = ["derive"] } include_dir = "0.7" async-std = { version = "1", features = ["attributes"] } khal-example-shaders = { path = "../khal-example-shaders" } [build-dependencies] -khal-builder = { version = "0.1.0", path = "../khal-builder" } +khal-builder = { version = "0.2.0", path = "../khal-builder" } # Listed as a build-dep (in addition to the regular `[dependencies]` entry # above) so cargo runs the shader crate's `build.rs` before ours and # forwards its `DEP_KHAL_EXAMPLE_SHADERS_MANIFEST_DIR` env var to our diff --git a/crates/khal-std/Cargo.toml b/crates/khal-std/Cargo.toml index 0a4e29d..a947d0d 100644 --- a/crates/khal-std/Cargo.toml +++ b/crates/khal-std/Cargo.toml @@ -29,7 +29,7 @@ glamx = { version = "0.3", default-features = false, features = ["nostd-libm", " rayon = { version = "1", optional = true } corosensei = { version = "0.3", optional = true } spirv-std-macros = "0.10.0-alpha.1" -khal-derive = { version = "0.1.0", path = "../khal-derive" } +khal-derive = { version = "0.2.0", path = "../khal-derive" } [lints] workspace = true diff --git a/crates/khal/Cargo.toml b/crates/khal/Cargo.toml index b23d4b5..3e3472d 100644 --- a/crates/khal/Cargo.toml +++ b/crates/khal/Cargo.toml @@ -26,7 +26,7 @@ thiserror = { workspace = true } smallvec = "1" include_dir = "0.7" -khal-derive = { version = "0.1", path = "../khal-derive", optional = true } +khal-derive = { version = "0.2", path = "../khal-derive", optional = true } cudarc = { workspace = true, optional = true } [target.'cfg(not(target_arch = "nvptx64"))'.dependencies] @@ -39,4 +39,10 @@ paste = "1" [target.'cfg(target_os = "macos")'.dependencies] metal = { version = "0.32", optional = true } -naga = { version = "29", optional = true, features = ["spv-in", "msl-out"] } \ No newline at end of file +naga = { version = "29", optional = true, features = ["spv-in", "msl-out"] } + +[lints.rust] +# The `objc` crate's `msg_send!` macro (used by the Metal backend) expands to +# a `cfg(feature = "cargo-clippy")` check, which trips `unexpected_cfgs` under +# clippy. Declare it as an expected cfg so it doesn't warn. +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(feature, values("cargo-clippy"))'] } \ No newline at end of file diff --git a/crates/khal/src/backend/metal.rs b/crates/khal/src/backend/metal.rs index 126cff9..713d3f0 100644 --- a/crates/khal/src/backend/metal.rs +++ b/crates/khal/src/backend/metal.rs @@ -229,7 +229,8 @@ pub struct MetalFunction { /// order). At dispatch we call `setThreadgroupMemoryLength:atIndex:` /// for each entry. pub(crate) threadgroup_sizes: Arc>, - /// Workgroup size declared in the shader. Used for indirect dispatch. + /// Workgroup size declared in the shader. Reserved for indirect dispatch. + #[allow(dead_code)] pub(crate) workgroup_size: [u32; 3], } @@ -575,10 +576,10 @@ impl Backend for Metal { // 0, 1, 2... in declaration order — matching this Vec's indices. let mut threadgroup_sizes: Vec = Vec::new(); for (_, var) in module.naga.global_variables.iter() { - if needs_array_length(var.ty, &module.naga.types) { - if let Some(b) = &var.binding { - sizes_bindings.push((b.group, b.binding)); - } + if needs_array_length(var.ty, &module.naga.types) + && let Some(b) = &var.binding + { + sizes_bindings.push((b.group, b.binding)); } if matches!(var.space, naga::AddressSpace::WorkGroup) { let layout = module.layouter[var.ty]; @@ -636,7 +637,7 @@ impl Backend for Metal { .zip(module.naga.entry_points.iter()) .find_map(|(name_result, ep)| { if ep.name == entry_point { - name_result.as_ref().ok().map(|n| n.clone()) + name_result.as_ref().ok().cloned() } else { None } @@ -1120,14 +1121,13 @@ fn resource_options(usage: BufferUsages) -> MTLResourceOptions { fn needs_array_length(ty: naga::Handle, types: &naga::UniqueArena) -> bool { match types[ty].inner { naga::TypeInner::Struct { ref members, .. } => { - if let Some(member) = members.last() { - if let naga::TypeInner::Array { + if let Some(member) = members.last() + && let naga::TypeInner::Array { size: naga::ArraySize::Dynamic, .. } = types[member.ty].inner - { - return true; - } + { + return true; } false }