diff --git a/README.md b/README.md index 55eb7ec..1ccffab 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ At the moment, the following transformations are supported: | --------------------------------- | ----------- | ------ | ------ | | Combined Image Samplers | ✅ | ✅ | ✅ | | Immediates (Push Constants) | ✅ | ✅\* | ✅ | +| Binding Arrays | ✅ | ✅ | ✅ | | Mixed Depth / Comparison | ✅ | ⚠️\* | ❌ | | isnan / isinf Patching | ✅ | ✅ | ✅ | | Storage Cube Patching | ✅ | ✅ | ✅ | @@ -89,7 +90,6 @@ layout(std140, set = N+1, binding = 0) uniform PushBlock { } pc; // where N is the max set in the shader. - ``` ### Additional Notes @@ -110,6 +110,69 @@ layout(std140, set = N+1, binding = 0) uniform PushBlock { > \* naga's SPIR-V front-end rejects `MatrixStride 16` for `mat2x2`, this should be fixed soon (?). +## Binding Arrays + +Binding arrays are a feature commonly used in shaders and supported by WGSL compilers, just not on the web (yet?). +This patch takes fixed size arrays of size N containing opaque or concrete types and splits them into N individual bindings. +In `wgpu`, this covers following features: + +- `TEXTURE_BINDING_ARRAY` +- `BUFFER_BINDING_ARRAY` +- `STORAGE_RESOURCE_BINDING_ARRAY` +- `SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING` +- `STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING` + +```glsl +struct Thing { + float a; +}; + +#define MAX_RESOURCES 2 +layout(set = 0, binding = 0) uniform sampler u_samplers[MAX_RESOURCES]; +layout(set = 0, binding = 1) uniform texture2D u_textures[MAX_RESOURCES]; +layout(set = 0, binding = 2) uniform texture2DArray u_texture_arrays[MAX_RESOURCES]; +layout(set = 0, binding = 3) uniform image2D u_images[MAX_RESOURCES]; +layout(set = 0, binding = 4) uniform Thing u_things[MAX_RESOURCES]; +layout(set = 0, binding = 5, std140) buffer Thing u_buf_things[MAX_RESOURCES]; + +// is converted into... + +struct Thing { + float a; +}; + +layout(set = 0, binding = 0) uniform sampler u_samplers_0; +layout(set = 0, binding = 1) uniform sampler u_samplers_1; +layout(set = 0, binding = 2) uniform texture2D u_textures_0; +layout(set = 0, binding = 3) uniform texture2D u_textures_1; +layout(set = 0, binding = 4) uniform texture2DArray u_texture_arrays_0; +layout(set = 0, binding = 5) uniform texture2DArray u_texture_arrays_1; +layout(set = 0, binding = 6) uniform image2D u_images_0; +layout(set = 0, binding = 7) uniform image2D u_images_1; +layout(set = 0, binding = 8) uniform Thing u_things_0; +layout(set = 0, binding = 9) uniform Thing u_things_1; +layout(set = 0, binding = 10, std140) buffer Thing u_buf_things_0; +layout(set = 0, binding = 11, std140) buffer Thing u_buf_things_1; +``` + +### Additional Notes + +- Combined image samplers are not supported, please run the combined image sampler pass first. +- Nested resources (`texture2D u[I][J][K]`) are not supported. +- Usage of additional SPIR-V capabilities such as `SparseResidency` or `ImageQuery` are not supported. + +### Tests + +| Test | `spirv-val` | Naga | Tint | +| ------------------------------------- | ----------- | ------ | ---- | +| `buffer_binding_array.frag` | ✅ | ✅ | ✅ | +| `storage_binding_array.frag` | ✅ | ✅ | ✅ | +| `texture_binding_array.frag` | ✅ | ✅ | ✅ | +| `nested_texture_binding_array.frag` | ✅ | ✅ | ✅ | +| `sampler_binding_array.frag` | ✅ | ✅ | ✅ | +| `sampler_stub.frag` | ✅ | ✅ | ✅ | +| `texture_array_binding_array.frag` | ✅ | ✅ | ✅ | + ## Mixed Depth / Comparison The WGSL spec differentiates between `sampler` and `sampler_comparison` as well as `texture2d` and `texture_depth_2d`. diff --git a/ffi/bin/README.txt b/ffi/bin/README.txt new file mode 100644 index 0000000..fe9e20d --- /dev/null +++ b/ffi/bin/README.txt @@ -0,0 +1,2 @@ +Make sure to update this guy when a new patch is included in the API. +The main purpose of this file is not to test functionality but linking! diff --git a/ffi/bin/spv_webgpu_transform.c b/ffi/bin/spv_webgpu_transform.c index 148563c..5a3e807 100644 --- a/ffi/bin/spv_webgpu_transform.c +++ b/ffi/bin/spv_webgpu_transform.c @@ -40,6 +40,14 @@ int main() { uint32_t pruneunuseddref_out_count; spirv_webgpu_transform_pruneunuseddref_alloc(storagecube_out_spv, storagecube_out_count, &pruneunuseddref_out_spv, &pruneunuseddref_out_count); + uint32_t *immediates_out_spv; + uint32_t immediates_out_count; + spirv_webgpu_transform_immediatespatch_alloc(pruneunuseddref_out_spv, pruneunuseddref_out_count, &immediates_out_spv, &immediates_out_count); + + uint32_t *splitbindingarray_out_spv; + uint32_t splitbindingarray_out_count; + spirv_webgpu_transform_splitbindingarray_alloc(immediates_out_spv, immediates_out_count, &splitbindingarray_out_spv, &splitbindingarray_out_count, &correction_map); + // 3. Observe the patched variables print_set_binding(correction_map, 0, 0); print_set_binding(correction_map, 0, 1); @@ -51,6 +59,8 @@ int main() { print_set_binding(correction_map, 3, 0); // 4. Free memory + spirv_webgpu_transform_splitbindingarray_free(splitbindingarray_out_spv); + spirv_webgpu_transform_immediatespatch_free(immediates_out_spv); spirv_webgpu_transform_pruneunuseddref_free(pruneunuseddref_out_spv); spirv_webgpu_transform_storagecubepatch_free(storagecube_out_spv); spirv_webgpu_transform_isnanisinfpatch_free(isnanisinf_out_spv); @@ -88,6 +98,9 @@ void print_set_binding(TransformCorrectionMap map, uint32_t set, uint32_t bindin case SPIRV_WEBGPU_TRANSFORM_CORRECTION_TYPE_CONVERT_STORAGE_CUBE: printf("CONVERT_STORAGE_CUBE "); break; + case SPIRV_WEBGPU_TRANSFORM_CORRECTION_TYPE_SPLIT_BINDING_ARRAY: + printf("SPLIT_BINDING_ARRAY "); + break; } } printf("\n"); diff --git a/ffi/spirv_webgpu_transform.h b/ffi/spirv_webgpu_transform.h index d28041a..3fa6545 100644 --- a/ffi/spirv_webgpu_transform.h +++ b/ffi/spirv_webgpu_transform.h @@ -23,6 +23,8 @@ void spirv_webgpu_transform_storagecubepatch_alloc(uint32_t *in_spv, uint32_t in void spirv_webgpu_transform_storagecubepatch_free(uint32_t *out_spv); void spirv_webgpu_transform_pruneunuseddref_alloc(uint32_t *int_spv, uint32_t in_count, uint32_t **out_spv, uint32_t *out_count); void spirv_webgpu_transform_pruneunuseddref_free(uint32_t *out_spv); +void spirv_webgpu_transform_splitbindingarray_alloc(uint32_t *in_spv, uint32_t in_count, uint32_t **out_spv, uint32_t *out_count, TransformCorrectionMap *correction_map); +void spirv_webgpu_transform_splitbindingarray_free(uint32_t *out_spv); void spirv_webgpu_transform_mirrorpatch_alloc( uint32_t *in_left_spv, uint32_t in_left_count, TransformCorrectionMap *left_corrections, @@ -41,6 +43,7 @@ typedef enum { SPIRV_WEBGPU_TRANSFORM_CORRECTION_TYPE_SPLIT_DREF_REGULAR = 1, SPIRV_WEBGPU_TRANSFORM_CORRECTION_TYPE_SPLIT_DREF_COMPARISON = 2, SPIRV_WEBGPU_TRANSFORM_CORRECTION_TYPE_CONVERT_STORAGE_CUBE = 3, + SPIRV_WEBGPU_TRANSFORM_CORRECTION_TYPE_SPLIT_BINDING_ARRAY = 4, } TransformCorrectionType; // SAFETY: `corrections` invalidates when `correction_map` is written to. diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 8de424d..ff57a83 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -3,7 +3,7 @@ use core::{ffi, ptr, slice}; use spirv_webgpu_transform::{ CorrectionMap, combimgsampsplitter, drefsplitter, immediatespatch, isnanisinfpatch, - mirrorpatch, pruneunuseddref, storagecubepatch, + mirrorpatch, pruneunuseddref, splitbindingarray, storagecubepatch, }; type TransformCorrectionMap = *mut ffi::c_void; @@ -199,6 +199,35 @@ pub unsafe extern "C" fn spirv_webgpu_transform_pruneunuseddref_free(out_spv: *m unsafe { drop(Box::from_raw(out_spv)) } } +#[unsafe(no_mangle)] +pub unsafe extern "C" fn spirv_webgpu_transform_splitbindingarray_alloc( + in_spv: *const u32, + in_count: u32, + out_spv: *mut *const u32, + out_count: *mut u32, + correction_map: *mut TransformCorrectionMap, +) { + let correction_map = unsafe { alloc_or_pass_correction_map(correction_map) }; + + let in_spv = unsafe { slice::from_raw_parts(in_spv, in_count as usize) }; + match splitbindingarray(in_spv, correction_map) { + Ok(spv) => unsafe { + *out_count = spv.len() as u32; + let leaked = Box::leak(spv.into_boxed_slice()); + *out_spv = leaked.as_ptr(); + }, + Err(_) => unsafe { + *out_spv = ptr::null(); + *out_count = 0; + }, + } +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn spirv_webgpu_transform_splitbindingarray_free(out_spv: *mut u32) { + unsafe { drop(Box::from_raw(out_spv)) } +} + #[unsafe(no_mangle)] pub unsafe extern "C" fn spirv_webgpu_transform_mirrorpatch_alloc( in_left_spv: *const u32, @@ -267,6 +296,8 @@ pub enum TransformCorrectionType { SpirvWebgpuTransformCorrectionTypeSplitCombined = 0, SpirvWebgpuTransformCorrectionTypeSplitDrefRegular = 1, SpirvWebgpuTransformCorrectionTypeSplitDrefComparison = 2, + SpirvWebgpuTransformCorrectionTypeConvertStorageCube = 3, + SpirvWebgpuTransformCorrectionTypeSplitBindingArray = 4, } // TransformCorrectionStatus spirv_webgpu_transform_correction_map_index(uint32_t set, uint32_t binding, TransformCorrectionType** corrections_ptr, uint32_t* correction_count); diff --git a/src/bin/spv_webgpu_transform.rs b/src/bin/spv_webgpu_transform.rs index 6183d1d..0f31ba6 100644 --- a/src/bin/spv_webgpu_transform.rs +++ b/src/bin/spv_webgpu_transform.rs @@ -5,7 +5,7 @@ fn main() { if args.len() != 4 { eprintln!( - "Usage: spv_webgpu_transform " + "Usage: spv_webgpu_transform " ); process::exit(1); } @@ -32,6 +32,9 @@ fn main() { } "pruneunuseddref" => spirv_webgpu_transform::pruneunuseddref(&spv).unwrap(), "immediates" => spirv_webgpu_transform::immediatespatch(&spv).unwrap(), + "bindingarray" => { + spirv_webgpu_transform::splitbindingarray(&spv, &mut out_correction_map).unwrap() + } mode => { eprintln!("unknown mode {:?}", mode); process::exit(1) diff --git a/src/correction.rs b/src/correction.rs index fb0723d..ec3175c 100644 --- a/src/correction.rs +++ b/src/correction.rs @@ -3,18 +3,20 @@ use super::*; // Q: Hey what happens when you stack corrections? // A: I don't want to think about it... I will start thinking after a refactor... -#[repr(u16)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CorrectionType { /// A combined image sampler has been split, a new `sampler` object should be inserted. - SplitCombined = 0, + SplitCombined, /// A mixed depth texture / sampler has been duplicated, insert the same object again with a `Regular` bind type. - SplitDrefRegular = 1, + SplitDrefRegular, /// A mixed depth texture / sampler has been duplicated, insert the same object again with a /// `Comparison` bind type. - SplitDrefComparison = 2, + SplitDrefComparison, /// A storage cube texture has been converted into a storage texture 2D array, change the dimension. - ConvertStorageCube = 3, + ConvertStorageCube, + /// A binding array has been split into new variables. Insert the same resource again. + /// For an `N` sized array, expect `N-1` entries. + SplitBindingArray, } #[derive(Debug, Clone, Default, PartialEq, Eq)] diff --git a/src/lib.rs b/src/lib.rs index 2bf78a8..59c1da4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ //! | --------------------------------- | ----------- | ------ | ------ | //! | Combined Image Samplers | ✅ | ✅ | ✅ | //! | Immediates (Push Constants) | ✅ | ✅\* | ✅ | +//! | Binding Arrays | ✅ | ✅ | ✅ | //! | Mixed Depth / Comparison | ✅ | ⚠️\* | ❌ | //! | isnan / isinf Patching | ✅ | ✅ | ✅ | //! | Storage Cube Patching | ✅ | ✅ | ✅ | @@ -35,6 +36,7 @@ mod immediatespatch; mod isnanisinfpatch; mod mirrorpatch; mod pruneunuseddref; +mod splitbindingarray; mod splitcombined; mod splitdref; mod spv; @@ -52,6 +54,7 @@ pub use immediatespatch::*; pub use isnanisinfpatch::*; pub use mirrorpatch::*; pub use pruneunuseddref::*; +pub use splitbindingarray::*; pub use splitcombined::*; pub use splitdref::*; pub use storagecubepatch::*; diff --git a/src/mirrorpatch.rs b/src/mirrorpatch.rs index ae073e4..3f063c9 100644 --- a/src/mirrorpatch.rs +++ b/src/mirrorpatch.rs @@ -214,7 +214,7 @@ fn patch_spv_decorations( // Convert into affected decoration Ok(AffectedDecoration { original_res_id: original_variable_id, - new_res_id, + new_res_ids: vec![new_res_id], correction_type, }) }) diff --git a/src/pruneunuseddref.rs b/src/pruneunuseddref.rs index 6c2665c..422c53c 100644 --- a/src/pruneunuseddref.rs +++ b/src/pruneunuseddref.rs @@ -186,7 +186,6 @@ pub fn pruneunuseddref(in_spv: &[u32]) -> Result, ()> { new_spv[spv_idx..spv_idx + word_count].fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); } - prune_noops(&mut new_spv); // 10. Write New Header and New Code diff --git a/src/splitbindingarray.rs b/src/splitbindingarray.rs new file mode 100644 index 0000000..96f9c9f --- /dev/null +++ b/src/splitbindingarray.rs @@ -0,0 +1,775 @@ +use super::*; + +fn inc(ib: &mut u32) -> u32 { + *ib += 1; + *ib - 1 +} + +// For the purposes of this patch, I consider an OpTypeImage and OpTypeSampler to be opaque. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct OpaqueArrayType; + +mod rechain_instructions; +mod select_template; + +use rechain_instructions::*; +use select_template::*; + +/// Perform the operation on a `Vec`. +/// Use [u8_slice_to_u32_vec] to convert a `&[u8]` into a `Vec` +/// Either update the existing `corrections` or create a new one. +/// +/// Assumed SPIR-V properties for this patch: +/// +/// TODO: +/// - No nested +/// - No additional capabilities (SparseResidency or ImageQuery) +/// +/// wgpu Properties: +/// +/// - The only opaque types that can be in an array are `OpTypeImage` and `OpTypeSampler` +/// +/// SPIR-V Properties (These should always be true): +/// - No opaque types in structures +/// - All UBOs and SSBO hold a structure and therefore are accessed with `OpAccessChain*` first. +/// +pub fn splitbindingarray( + in_spv: &[u32], + corrections: &mut Option, +) -> Result, ()> { + let spv = in_spv.to_owned(); + + let mut instruction_bound = spv[SPV_HEADER_INSTRUCTION_BOUND_OFFSET]; + let magic_number = spv[SPV_HEADER_MAGIC_NUM_OFFSET]; + + let spv_header = spv[0..SPV_HEADER_LENGTH].to_owned(); + + assert_eq!(magic_number, SPV_HEADER_MAGIC); + + let mut instruction_inserts = vec![]; + let word_inserts = vec![]; + + let spv = spv.into_iter().skip(SPV_HEADER_LENGTH).collect::>(); + let mut new_spv = spv.clone(); + + let mut op_type_int_idxs = vec![]; + let mut op_type_array_idxs = vec![]; + let mut op_type_pointer_idxs = vec![]; + let mut op_type_image_idxs = vec![]; + let mut op_type_sampler_idxs = vec![]; + let mut op_constant_idxs = vec![]; + let mut op_variable_idxs = vec![]; + let mut op_access_chain_idxs = vec![]; + let mut op_in_bounds_access_chain_idxs = vec![]; + let mut op_load_idxs = vec![]; + let mut op_store_idxs = vec![]; + let mut op_copy_memory_idxs = vec![]; + let mut op_type_function_idxs = vec![]; + let mut op_function_parameter_idxs = vec![]; + let mut op_function_call_idxs = vec![]; + let mut op_function_end_idxs = vec![]; + let mut op_decorate_idxs = vec![]; + let mut op_name_idxs = vec![]; + let mut op_sampled_image_idxs = vec![]; + + // 1. Find locations instructions we need + let mut spv_idx = 0; + while spv_idx < spv.len() { + let op = spv[spv_idx]; + let word_count = hiword(op); + let instruction = loword(op); + + match instruction { + SPV_INSTRUCTION_OP_TYPE_INT => op_type_int_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_TYPE_ARRAY => op_type_array_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_TYPE_POINTER => op_type_pointer_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_TYPE_IMAGE => op_type_image_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_TYPE_SAMPLER => op_type_sampler_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_CONSTANT => op_constant_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_VARIABLE => op_variable_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_ACCESS_CHAIN => op_access_chain_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_IN_BOUNDS_ACCESS_CHAIN => { + op_in_bounds_access_chain_idxs.push(spv_idx) + } + SPV_INSTRUCTION_OP_LOAD => op_load_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_STORE => op_store_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_COPY_MEMORY => op_copy_memory_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_TYPE_FUNCTION => op_type_function_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_FUNCTION_PARAMETER => op_function_parameter_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_FUNCTION_CALL => op_function_call_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_FUNCTION_END => op_function_end_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_DECORATE => op_decorate_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_NAME => op_name_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_SAMPLED_IMAGE => op_sampled_image_idxs.push(spv_idx), + + _ => {} + } + + spv_idx += word_count as usize; + } + + // TODO: Implement for nested arrays. + for ta_idx in op_type_array_idxs.iter() { + let ta_underlying_id = spv[ta_idx + 2]; + for ta_jdx in op_type_array_idxs.iter() { + if spv[ta_jdx + 2] == ta_underlying_id && ta_idx != ta_jdx { + unimplemented!("How dare you use nested arrays! (Unimplemented)"); + } + } + } + + // 2. OpTypeArray -> OpTypePointer + // -> OpVariable + // -> OpFunctionParameter + let array_tp_ta_idxs = op_type_pointer_idxs + .iter() + .filter_map(|&tp_idx| { + let tp_storage_class = spv[tp_idx + 2]; + let tp_underlying_id = spv[tp_idx + 3]; + + if tp_storage_class != SPV_STORAGE_CLASS_UNIFORM_CONSTANT + && tp_storage_class != SPV_STORAGE_CLASS_UNIFORM + { + return None; + } + + op_type_array_idxs + .iter() + .find(|&ta_idx| { + let ta_res_id = spv[ta_idx + 1]; + + ta_res_id == tp_underlying_id + }) + .map(|&ta_idx| { + let array_type = op_type_image_idxs + .iter() + .chain(op_type_sampler_idxs.iter()) + .any(|&t_idx| spv[t_idx + 1] == spv[ta_idx + 2]) + .then_some(OpaqueArrayType); + + (tp_idx, ta_idx, array_type) + }) + }) + .collect::>(); + + // Contains ((OpVariable or OpFunctionParameter), OpTypePointer, Option) + // OpVariable is a subtype of OpFunctionParameter over the first three words. + let array_vfp_ta_idxs = op_variable_idxs + .iter() + .chain(op_function_parameter_idxs.iter()) + .filter_map(|&vfp_idx| { + let variable_type_id = spv[vfp_idx + 1]; + array_tp_ta_idxs + .iter() + .find(|&(tp_idx, _, _)| { + let tp_res_id = spv[tp_idx + 1]; + tp_res_id == variable_type_id + }) + .map(|&(_, ta_idx, array_type)| (vfp_idx, ta_idx, array_type)) + }) + .collect::>(); + + // 3. Build mapping of lengths + let length_map = array_vfp_ta_idxs + .iter() + .map(|(_, ta_idx, _)| { + let length_id = spv[ta_idx + 3]; + let Some(length) = op_constant_idxs.iter().find_map(|&constant_idx| { + (spv[constant_idx + 2] == length_id).then_some(spv[constant_idx + 3]) + }) else { + panic!("Missing OpConstant") + }; + (ta_idx, length) + }) + .collect::>(); + + // 4. Unroll array variables + let types_header_position = last_of_indices!(op_type_int_idxs, op_type_pointer_idxs); + let mut types_header_insert = InstructionInsert { + previous_spv_idx: types_header_position.unwrap(), + instruction: vec![], + }; + let mut new_vfp_map = HashMap::new(); + let mut function_type_changes = HashMap::new(); + let mut affected_decorations = vec![]; + + for &(vfp_idx, ta_idx, array_type) in array_vfp_ta_idxs.iter() { + new_spv[vfp_idx..vfp_idx + hiword(spv[vfp_idx]) as usize] + .fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + + let mut new_type_instructions = vec![]; + + let instruction = loword(spv[vfp_idx]); + let underlying_type_id = spv[ta_idx + 2]; + let type_pointer_id = ensure_type_pointer( + &spv, + &op_type_pointer_idxs, + &mut instruction_bound, + &mut new_type_instructions, + match array_type { + Some(OpaqueArrayType) => SPV_STORAGE_CLASS_UNIFORM_CONSTANT, + _ => SPV_STORAGE_CLASS_UNIFORM, + }, + underlying_type_id, + ); + + let length = length_map[&ta_idx]; + + let base_id = instruction_bound; + instruction_bound += length; + + match instruction { + SPV_INSTRUCTION_OP_VARIABLE => { + for i in 0..length { + new_type_instructions.append(&mut vec![ + encode_word(4, SPV_INSTRUCTION_OP_VARIABLE), + type_pointer_id, + base_id + i, + match array_type { + Some(OpaqueArrayType) => SPV_STORAGE_CLASS_UNIFORM_CONSTANT, + _ => SPV_STORAGE_CLASS_UNIFORM, + }, + ]); + } + // Ordering issues with this, let's keep it after all other type pointers. + // + // instruction_inserts.push(InstructionInsert { + // previous_spv_idx: v_idx, + // instruction: new_instruction, + // }); + types_header_insert + .instruction + .append(&mut new_type_instructions); + let old_result_id = spv[vfp_idx + 2]; + + // We manually correct the base variable to reuse the original decorations. + // That way, we can output `N-1` correction bindings. + for &d_idx in op_decorate_idxs.iter() { + if spv[d_idx + 1] == old_result_id { + new_spv[d_idx + 1] = base_id; + } + } + for &n_idx in op_name_idxs.iter() { + if spv[n_idx + 1] == old_result_id { + new_spv[n_idx + 1] = base_id; + } + } + + // We only want `N-1` correction bindings. + let new_ids = (base_id + 1..base_id + length).collect::>(); + affected_decorations.push(AffectedDecoration { + original_res_id: old_result_id, + new_res_ids: new_ids, + correction_type: CorrectionType::SplitBindingArray, + }); + } + SPV_INSTRUCTION_OP_FUNCTION_PARAMETER => { + let mut new_param_instructions = vec![]; + for i in 0..length { + new_param_instructions.append(&mut vec![ + encode_word(3, SPV_INSTRUCTION_OP_FUNCTION_PARAMETER), + type_pointer_id, + base_id + i, + ]); + } + instruction_inserts.push(InstructionInsert { + previous_spv_idx: vfp_idx, + instruction: new_param_instructions, + }); + + let entry = get_function_from_parameter(&spv, vfp_idx); + let function_type_id = spv[entry.function_idx + 4]; + + // `entry.parameter_instruction_idx` is the 0-based ordinal of the parameter + // within the function; step 5 compares it against the loop variable `i`. + function_type_changes + .entry(function_type_id) + .or_insert(vec![]) + .push((entry.parameter_instruction_idx, type_pointer_id, length)); + } + _ => unreachable!("Expected OpVariable or OpFunctionParameter"), + }; + + new_vfp_map.insert(vfp_idx, (base_id, ta_idx)); + } + + // 5. Change affected OpTypeFunction + for &tf_idx in op_type_function_idxs.iter() { + let tf_result_id = spv[tf_idx + 1]; + + let Some(changes) = function_type_changes.get(&tf_result_id) else { + continue; + }; + + let tf_wc = hiword(spv[tf_idx]) as usize; + let num_params = tf_wc - 3; + + let mut new_params: Vec = vec![]; + let mut change_i = 0; + for i in 0..num_params { + if change_i < changes.len() && changes[change_i].0 == i { + let (_, type_ptr, length) = changes[change_i]; + for _ in 0..length { + new_params.push(type_ptr); + } + change_i += 1; + } else { + new_params.push(spv[tf_idx + 3 + i]); + } + } + + new_spv[tf_idx..tf_idx + tf_wc].fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + + let new_wc = (3 + new_params.len()) as u16; + let mut new_tf = vec![ + encode_word(new_wc, SPV_INSTRUCTION_OP_TYPE_FUNCTION), + tf_result_id, + spv[tf_idx + 2], // return type (unchanged) + ]; + new_tf.extend_from_slice(&new_params); + types_header_insert.instruction.extend_from_slice(&new_tf); + } + + let access_idxs = op_access_chain_idxs + .iter() + .chain(op_in_bounds_access_chain_idxs.iter()) + .filter_map(|&ac_idx| { + let base_id = spv[ac_idx + 3]; + array_vfp_ta_idxs + .iter() + .find(|&(vfp_idx, _, _)| { + let result_id = spv[*vfp_idx + 2]; + result_id == base_id + }) + .map(|(vfp_idx, ta_idx, array_type)| (ac_idx, vfp_idx, ta_idx, array_type)) + }) + .collect::>(); + + // 6. Trace array samplers into a map + // Arrayed samplers turn our neat trace tree into a DAG. + // To keep things simple, we handle samplers separately. + // See `opaque_trace.rs` for details. + let mut arrayed_sampler_map = HashMap::new(); + for &(ac_idx, &vfp_idx, ta_idx, &array_type) in access_idxs.iter() { + let access_result_id = spv[ac_idx + 2]; + if let Some(OpaqueArrayType) = array_type { + for &load_idx in op_load_idxs.iter() { + let result_id = spv[load_idx + 2]; + let pointer_id = spv[load_idx + 3]; + if pointer_id == access_result_id { + for &sampled_image_idx in op_sampled_image_idxs.iter() { + let sampler_id = spv[sampled_image_idx + 4]; + if sampler_id == result_id { + arrayed_sampler_map + .insert(sampled_image_idx, (ac_idx, vfp_idx, ta_idx)); + } + } + } + } + } + } + + // 7. Replace OpAccessChain with selection function + for &(ac_idx, vfp_idx, ta_idx, array_type) in access_idxs.iter() { + let ac_word_count = hiword(spv[ac_idx]) as usize; + new_spv[ac_idx..ac_idx + ac_word_count].fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + + let old_result_id = spv[ac_idx + 2]; + let index_0_id = spv[ac_idx + 4]; + + let length = length_map[&ta_idx]; + + let (base_id, _) = new_vfp_map[vfp_idx]; + + if let Some(OpaqueArrayType) = *array_type { + // When both a texture array and a sampler array feed the same OpSampledImage, + // the texture AC's processing already generates the correct nested switch + // (outer = texture index, inner = sampler index via `maybe_sampler_array_data`). + // + // Detect this by checking whether this AC is already stored as the sampler + // dimension in `arrayed_sampler_map`. If so, just NOP its dependent loads + // (they reference the now-undefined AC result) and skip switch generation. + let is_inner_sampler_ac = arrayed_sampler_map + .values() + .any(|&(map_ac_idx, _, _)| map_ac_idx == ac_idx); + if is_inner_sampler_ac { + for &load_idx in op_load_idxs.iter() { + if spv[load_idx + 3] == old_result_id { + let wc = hiword(spv[load_idx]) as usize; + new_spv[load_idx..load_idx + wc] + .fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + } + } + continue; + } + + let load_idxs = op_load_idxs + .iter() + .filter(|&idx| { + let pointer = spv[idx + 3]; + pointer == old_result_id + }) + .copied() + .collect::>(); + let dependent_traces = trace_loaded_opaques(&spv, &load_idxs); + for trace in dependent_traces { + let maybe_sampler_array_data = match trace.next { + OpaqueImageOp::Sampled(sampled_image_op) => { + arrayed_sampler_map.get(&sampled_image_op.idx) + } + _ => None, + }; + + let switch_instructions = + reconstruct_opaque_trace_and_overwrite(&spv, &mut new_spv, &trace); + let underlying_type_and_target_id = + get_last_instruction_result_type_and_id(&switch_instructions); + let rotate_image_sampler = matches!( + trace.next, + OpaqueImageOp::Sampled(SampledImageOp { + parent: SampledImageParent::Sampler, + .. + }) + ); + let rechain_instructions = |ib: &mut u32, target_id: u32| { + let (instructions, output) = rechain_instructions_with_target_id( + ib, + &switch_instructions, + target_id, + false, + rotate_image_sampler, + ); + (instructions, output.map(|(_, id)| id)) + }; + // Track inner merge labels per outer case so we can fix the outer phi after select_template_spv runs. + // `select_template_spv` puts the outer case labels in the phi, but with a nested inner switch the actual predecessor + // of the outer merge is the inner merge block, not the outer case block. + let mut inner_merge_labels: Vec = vec![]; + + let builder = |ib: &mut u32, target_id: u32| { + if let Some((sampler_array_ac_idx, sampler_array_v_idx, sampler_array_ta_idx)) = + maybe_sampler_array_data + { + let (sampler_base_id, _) = new_vfp_map[sampler_array_v_idx]; + let sampler_index_0_id = spv[sampler_array_ac_idx + 4]; + let sampler_length = length_map[sampler_array_ta_idx] as usize; + + // Rechain only the image load for this outer case. + // switch_instructions = [image_load, OpSampledImage, ...] + // target_id is the split image variable for this outer case. + let image_load_wc = hiword(switch_instructions[0]) as usize; + let (image_load_instrs, image_out) = rechain_instructions_with_target_id( + ib, + &switch_instructions[..image_load_wc], + target_id, + false, + false, + ); + let (_, new_image_id) = + image_out.expect("image load must produce a result"); + + // Locate OpSampledImage and any instructions that follow it. + let si_wc = hiword(switch_instructions[image_load_wc]) as usize; + let after_si = &switch_instructions[image_load_wc + si_wc..]; + let sampler_type_id = spv[op_type_sampler_idxs[0] + 1]; + + // Inner builder: per sampler variable j, emit sampler load + OpSampledImage + trailing instructions. + // The image load is placed before the inner switch. + let inner_builder = |ib: &mut u32, inner_target_id: u32| { + let mut instrs = vec![]; + + let new_sampler_result = inc(ib); + instrs.extend_from_slice(&[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), + sampler_type_id, + new_sampler_result, + inner_target_id, + ]); + + let new_si_result = inc(ib); + let mut si_patched = + switch_instructions[image_load_wc..image_load_wc + si_wc].to_vec(); + si_patched[2] = new_si_result; + si_patched[3] = new_image_id; + si_patched[4] = new_sampler_result; + instrs.extend_from_slice(&si_patched); + + if !after_si.is_empty() { + let (chained, output) = rechain_instructions_with_target_id( + ib, + after_si, + new_si_result, + false, + false, + ); + instrs.extend_from_slice(&chained); + return (instrs, output.map(|(_, id)| id)); + } + (instrs, Some(new_si_result)) + }; + + let mut inner_switch = select_template_spv( + ib, + sampler_base_id, + sampler_index_0_id, + sampler_length, + inner_builder, + underlying_type_and_target_id, + ); + + // Find the inner merge label (last OpLabel before the inner phi). + let phi_idx = get_last_instruction_index(&inner_switch); + { + let mut idx = 0; + let mut label = 0u32; + while idx < phi_idx { + if loword(inner_switch[idx]) == SPV_INSTRUCTION_OP_LABEL { + label = inner_switch[idx + 1]; + } + idx += hiword(inner_switch[idx]) as usize; + } + inner_merge_labels.push(label); + } + + // Patch the inner phi's result id to a fresh id for the outer phi. + let output_id = (loword(inner_switch[phi_idx]) == SPV_INSTRUCTION_OP_PHI) + .then(|| { + let new_id = inc(ib); + inner_switch[phi_idx + 2] = new_id; + new_id + }); + + // Emit: image load once for this outer case, then the inner switch. + let mut result = image_load_instrs; + result.extend_from_slice(&inner_switch); + (result, output_id) + } else { + rechain_instructions(ib, target_id) + } + }; + + let mut switch = select_template_spv( + &mut instruction_bound, + base_id, + index_0_id, + length as usize, + builder, + underlying_type_and_target_id, + ); + + // Patch the outer phi's predecessor labels. + // select_template_spv filled them with the outer case labels, + // but each outer case now ends at its inner merge block, not at the outer case label. + // phi layout: [opword, type, result_id, val0, pred0, val1, pred1, ...] + if !inner_merge_labels.is_empty() { + let phi_idx = get_last_instruction_index(&switch); + if loword(switch[phi_idx]) == SPV_INSTRUCTION_OP_PHI { + for (i, &label) in inner_merge_labels.iter().enumerate() { + switch[phi_idx + 4 + 2 * i] = label; + } + } + } + + instruction_inserts.push(InstructionInsert { + previous_spv_idx: trace.last_result_id(), + instruction: switch, + }); + } + } else { + // For concreate types, find all dependent operations afterwards and replace each instruction with index switch + for &spv_idx in op_load_idxs + .iter() + .chain(op_store_idxs.iter()) + .chain(op_access_chain_idxs.iter()) + .chain(op_in_bounds_access_chain_idxs.iter()) + .chain(op_copy_memory_idxs.iter()) + { + let word_count = hiword(spv[spv_idx]) as usize; + let instruction = loword(spv[spv_idx]); + + let mut flip_store_into = false; + let is_dependent = match instruction { + SPV_INSTRUCTION_OP_STORE | SPV_INSTRUCTION_OP_COPY_MEMORY => { + // We need to handle cases where buffers are stored from and to. + let source_id = spv[spv_idx + 1]; + let dest_id = spv[spv_idx + 2]; + + // OpStore: %result = %a + if dest_id == old_result_id { + flip_store_into = true; + } + + source_id == old_result_id || dest_id == old_result_id + } + SPV_INSTRUCTION_OP_LOAD + | SPV_INSTRUCTION_OP_ACCESS_CHAIN + | SPV_INSTRUCTION_OP_IN_BOUNDS_ACCESS_CHAIN => { + let source_id = spv[spv_idx + 3]; + source_id == old_result_id + } + _ => unreachable!("Unexpected instruction {} while matching", instruction), + }; + + if is_dependent && ac_idx != spv_idx { + if instruction == SPV_INSTRUCTION_OP_ACCESS_CHAIN + || instruction == SPV_INSTRUCTION_OP_IN_BOUNDS_ACCESS_CHAIN + { + unimplemented!( + "Nested OpAccessChain / OpInBoundsAccessChain on binding array (Unimplemented)" + ); + } + + // We don't want to fully overwrite the access chain since UBOs and SSBOs + // accesses will always be followed by these. + let mut new_instructions = [ + &spv[ac_idx..ac_idx + 4], + &spv[ac_idx + 5..ac_idx + ac_word_count], + &spv[spv_idx..spv_idx + word_count], + ] + .concat(); + new_instructions[0] = + encode_word(ac_word_count as u16 - 1, SPV_INSTRUCTION_OP_ACCESS_CHAIN); + + new_spv[spv_idx..spv_idx + word_count] + .fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + + let builder = &|ib: &mut u32, target_id: u32| { + let (instructions, output) = rechain_instructions_with_target_id( + ib, + &new_instructions, + target_id, + flip_store_into, + false, + ); + (instructions, output.map(|(_, id)| id)) + }; + + let underlying_type_and_target_id = + get_last_instruction_result_type_and_id(&new_instructions); + let switch = select_template_spv( + &mut instruction_bound, + base_id, + index_0_id, + length as usize, + builder, + underlying_type_and_target_id, + ); + instruction_inserts.push(InstructionInsert { + previous_spv_idx: spv_idx, + instruction: switch, + }); + } + } + } + } + + // 8. Replace all OpFunctionCall references of arrayed resources + let new_vfp_id_map = new_vfp_map + .iter() + .map(|(&vfp_idx, &v)| { + let result_id = spv[vfp_idx + 2]; + (result_id, v) + }) + .collect::>(); + for &function_call_idx in op_function_call_idxs.iter() { + const ARGUMENT_OFFSET: usize = 4; + let word_count = hiword(spv[function_call_idx]) as usize; + let mut arguments = vec![]; + for &argument_id in spv + .iter() + .take(function_call_idx + word_count) + .skip(function_call_idx + ARGUMENT_OFFSET) + { + if let Some(&(base_id, ta_idx)) = new_vfp_id_map.get(&argument_id) { + let length = length_map[&ta_idx]; + for i in 0..length { + arguments.push(base_id + i); + } + } else { + arguments.push(argument_id) + } + } + + if arguments.len() != word_count - ARGUMENT_OFFSET { + new_spv[function_call_idx..function_call_idx + word_count] + .fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + let new_instruction = [ + &[encode_word( + (arguments.len() + ARGUMENT_OFFSET) as u16, + SPV_INSTRUCTION_OP_FUNCTION_CALL, + )], + &spv[function_call_idx + 1..function_call_idx + ARGUMENT_OFFSET], + arguments.as_slice(), + ] + .concat(); + instruction_inserts.push(InstructionInsert { + previous_spv_idx: function_call_idx, + instruction: new_instruction, + }); + } + } + + // 9. Find OpDecorate / OpName to OpVariable + let unused_decorate_idxs = op_decorate_idxs + .iter() + .filter(|&&idx| { + let target = spv[idx + 1]; + if new_spv[idx + 1] != target { + return false; + } + new_vfp_map.iter().any(|(vfp_idx, _)| { + let result_id = spv[vfp_idx + 2]; + target == result_id + }) + }) + .copied() + .collect::>(); + let unused_name_idxs = op_name_idxs + .iter() + .filter(|&&idx| { + let target = spv[idx + 1]; + if new_spv[idx + 1] != target { + return false; + } + new_vfp_map.iter().any(|(vfp_idx, _)| { + let result_id = spv[vfp_idx + 2]; + target == result_id + }) + }) + .copied() + .collect::>(); + + // 10. Remove Instructions that have been Whited Out. + for &spv_idx in unused_decorate_idxs.iter().chain(unused_name_idxs.iter()) { + let op = spv[spv_idx]; + let word_count = hiword(op) as usize; + + new_spv[spv_idx..spv_idx + word_count].fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + } + + // 11. OpDecorate + let DecorateOut { + descriptor_sets_to_correct, + } = util::decorate(DecorateIn { + spv: &spv, + instruction_inserts: &mut instruction_inserts, + first_op_deocrate_idx: op_decorate_idxs.first().copied(), + op_decorate_idxs: &op_decorate_idxs, + affected_decorations: &affected_decorations, + corrections, + }); + + // 12. Insert New Instructions + instruction_inserts.insert(0, types_header_insert); + insert_new_instructions(&spv, &mut new_spv, &word_inserts, &instruction_inserts); + + // 13. Correct OpDecorate Bindings + util::correct_decorate(CorrectDecorateIn { + new_spv: &mut new_spv, + descriptor_sets_to_correct, + }); + prune_noops(&mut new_spv); + + // 14. Write New Header and New Code + Ok(fuse_final(spv_header, new_spv, instruction_bound)) +} diff --git a/src/splitbindingarray/rechain_instructions.rs b/src/splitbindingarray/rechain_instructions.rs new file mode 100644 index 0000000..d1af9db --- /dev/null +++ b/src/splitbindingarray/rechain_instructions.rs @@ -0,0 +1,128 @@ +use super::*; + +// +// Take the any chain of instructions with the following form: +// OpSomething %result_type_id %result_id %input ... +// +// New temp variables are properly chained between instructions. +// +// The final instruction's `[idx+2]` is replaced with `%target_id` +// +// The final instruction can be a write operation. +// Write instructions are specially checked for because they follow a different convention. +// The following are considered valid write instructions: +// OpStore, OpCopyMemory, OpImageWrite +// +// `flip_store_into` specifically changes `%a = %result` to `%result = %a` +// `chain_sampler_over_image` specifically changes OpImageSampled to chain to the sampler. +// +pub fn rechain_instructions_with_target_id( + ib: &mut u32, + snippet: &[u32], + target_id: u32, + flip_store_into: bool, + rotate_image_sampler: bool, +) -> (Vec, Option<(u32, u32)>) { + let mut instruction_offsets = vec![]; + let mut idx = 0; + while idx < snippet.len() { + instruction_offsets.push(idx); + idx += hiword(snippet[idx]) as usize; + } + + let last_j = instruction_offsets.len() - 1; + let last_off = instruction_offsets[last_j]; + let returns_result = !matches!( + loword(snippet[last_off]), + SPV_INSTRUCTION_OP_STORE | SPV_INSTRUCTION_OP_COPY_MEMORY | SPV_INSTRUCTION_OP_IMAGE_WRITE + ); + + let mut patched = snippet.to_vec(); + let mut current_source = target_id; + for (j, &off) in instruction_offsets.iter().enumerate() { + if j < last_j || returns_result { + let new_temp = inc(ib); + + if rotate_image_sampler && loword(patched[off]) == SPV_INSTRUCTION_OP_SAMPLED_IMAGE { + patched[off + 4] = current_source; + } else { + patched[off + 3] = current_source; + } + + patched[off + 2] = new_temp; + current_source = new_temp; + } else if flip_store_into { + patched[off + 2] = current_source; + } else { + patched[off + 1] = current_source; + } + } + + let last_offset = instruction_offsets[last_j]; + let underlying_type_and_target = (patched[last_offset + 1], patched[last_offset + 2]); + + ( + patched, + returns_result.then_some(underlying_type_and_target), + ) +} + +// Intended to run alongside the previous function with the same snippet properties. +// If the last instruction is not an expected store operation, return the final result type and id. +pub fn get_last_instruction_result_type_and_id(snippet: &[u32]) -> Option<(u32, u32)> { + let last_off = get_last_instruction_index(snippet); + let returns_result = !matches!( + loword(snippet[last_off]), + SPV_INSTRUCTION_OP_STORE | SPV_INSTRUCTION_OP_COPY_MEMORY | SPV_INSTRUCTION_OP_IMAGE_WRITE + ); + + returns_result.then_some((snippet[last_off + 1], snippet[last_off + 2])) +} + +#[test] +fn two_chained_result_instructions() { + #[rustfmt::skip] + let snippet: &[u32] = &[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 10, 1, 2, + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 20, 3, 4, + ]; + let mut ib = 500u32; + let (out, result) = rechain_instructions_with_target_id(&mut ib, snippet, 100, false, false); + assert_eq!(out[2], 500); + assert_eq!(out[3], 100); + assert_eq!(out[6], 501); + assert_eq!(out[7], 500); + assert_eq!(result, Some((20, 501))); +} + +#[test] +fn terminal_store_no_flip() { + #[rustfmt::skip] + let snippet: &[u32] = &[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 10, 1, 2, + encode_word(3, SPV_INSTRUCTION_OP_STORE), 3, 4, + ]; + let mut ib = 500u32; + let (out, result) = rechain_instructions_with_target_id(&mut ib, snippet, 100, false, false); + assert_eq!(out[2], 500); + assert_eq!(out[3], 100); + assert_eq!(out[5], 500); + assert_eq!(out[6], 4); + assert_eq!(result, None); +} + +#[test] +fn terminal_store_flip() { + #[rustfmt::skip] + let snippet: &[u32] = &[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 10, 1, 2, + encode_word(3, SPV_INSTRUCTION_OP_STORE), 3, 4, + ]; + let mut ib = 500u32; + let (out, result) = rechain_instructions_with_target_id(&mut ib, snippet, 100, true, false); + assert_eq!(out[2], 500); + assert_eq!(out[3], 100); + assert_eq!(out[5], 3); + assert_eq!(out[6], 500); + assert_eq!(result, None); +} diff --git a/src/splitbindingarray/select_template.rs b/src/splitbindingarray/select_template.rs new file mode 100644 index 0000000..b0f8cfb --- /dev/null +++ b/src/splitbindingarray/select_template.rs @@ -0,0 +1,92 @@ +use super::*; + +pub(super) fn select_template_spv (Vec, Option)>( + ib: &mut u32, + base_id: u32, + index_id: u32, + length: usize, + mut instruction_builder: F, + result_type_and_id: Option<(u32, u32)>, +) -> Vec { + // + // TODO: You can probably decrease the instruction count with OpPhi or OpSelect. + // + // OpSelectionMerge %merge None + // OpSwitch %index_id %default %merge 0 %case_0 1 %case_1 ... N %case_N + // %case_0 = OpLabel + // %temp_0 = {instruction_builder(%base_id+0)} + // OpBranch %merge + // %case_1 = OpLabel + // %temp_1 = {instruction_builder(%base_id+1)} + // OpBranch %merge + // + // ... + // + // %case_N = OpLabel + // %temp_N = {instruction_builder(%base_id+N)} + // OpBranch %merge + // %default = OpLabel + // %temp_def = {instruction_builder(%base_id+0)} + // %merge = OpLabel + // + // ; Only if there will be a result value. + // %target_id = OpPhi %underlying_type_id %temp_0 %case_0 %temp_1 %case_1 ... %temp_N %case_N %temp_def %default + // + + let case_labels = (0..length).map(|_| inc(ib)).collect::>(); + let default_label = inc(ib); + let merge_label = inc(ib); + + let mut spv = vec![]; + spv.extend_from_slice(&[ + encode_word(3, SPV_INSTRUCTION_OP_SELECTION_MERGE), + merge_label, + SPV_SELECTION_CONTROL_NONE, + encode_word(3 + 2 * length as u16, SPV_INSTRUCTION_OP_SWITCH), + index_id, + default_label, + ]); + for (i, &case_label) in case_labels.iter().enumerate() { + spv.push(i as u32); + spv.push(case_label); + } + + let mut output_ids = vec![]; + for (i, &case_label) in case_labels.iter().enumerate() { + spv.extend_from_slice(&[encode_word(2, SPV_INSTRUCTION_OP_LABEL), case_label]); + let (instructions, maybe_output_id) = instruction_builder(ib, base_id + i as u32); + spv.extend_from_slice(&instructions); + if let Some(output_id) = maybe_output_id { + output_ids.push(output_id); + } + spv.extend_from_slice(&[encode_word(2, SPV_INSTRUCTION_OP_BRANCH), merge_label]); + } + spv.extend_from_slice(&[encode_word(2, SPV_INSTRUCTION_OP_LABEL), default_label]); + let (instructions, default_output_id) = instruction_builder(ib, base_id); + spv.extend_from_slice(&instructions); + spv.extend_from_slice(&[ + encode_word(2, SPV_INSTRUCTION_OP_BRANCH), + merge_label, + encode_word(2, SPV_INSTRUCTION_OP_LABEL), + merge_label, + ]); + if let Some((result_type_id, target_id)) = result_type_and_id { + assert!(output_ids.len() == length); + spv.push(encode_word( + 3 + 2 * (length as u16 + 1), + SPV_INSTRUCTION_OP_PHI, + )); + spv.push(result_type_id); + spv.push(target_id); + for (i, &case_label) in case_labels.iter().enumerate() { + spv.push(output_ids[i]); + spv.push(case_label); + } + spv.push( + default_output_id.expect("default block must produce output when result is expected"), + ); + spv.push(default_label); + } + + spv +} diff --git a/src/splitcombined.rs b/src/splitcombined.rs index 789be03..a27edd9 100644 --- a/src/splitcombined.rs +++ b/src/splitcombined.rs @@ -70,6 +70,7 @@ pub fn combimgsampsplitter( } SPV_INSTRUCTION_OP_TYPE_SAMPLED_IMAGE => op_type_sampled_image_idxs.push(spv_idx), SPV_INSTRUCTION_OP_TYPE_POINTER => { + // This should probably go elsewhere. #[allow(clippy::collapsible_match)] if spv[spv_idx + 2] == SPV_STORAGE_CLASS_UNIFORM_CONSTANT { op_type_pointer_idxs.push(spv_idx); @@ -199,7 +200,7 @@ pub fn combimgsampsplitter( }| { AffectedDecoration { original_res_id: *v_res_id, - new_res_id: *new_sampler_v_res_id, + new_res_ids: vec![*new_sampler_v_res_id], correction_type: CorrectionType::SplitCombined, } }, diff --git a/src/splitdref.rs b/src/splitdref.rs index cdba030..b42fad1 100644 --- a/src/splitdref.rs +++ b/src/splitdref.rs @@ -552,7 +552,7 @@ pub fn drefsplitter( affected_variables.push(AffectedDecoration { original_res_id: spv[variable_idx + 2], - new_res_id: new_variable_id, + new_res_ids: vec![new_variable_id], correction_type: match complement_ty { OperationVariant::Regular => CorrectionType::SplitDrefRegular, OperationVariant::Dref => CorrectionType::SplitDrefComparison, diff --git a/src/spv.rs b/src/spv.rs index cb17b41..70459a0 100644 --- a/src/spv.rs +++ b/src/spv.rs @@ -26,7 +26,9 @@ pub const SPV_INSTRUCTION_OP_FUNCTION: u16 = 54; pub const SPV_INSTRUCTION_OP_VARIABLE: u16 = 59; pub const SPV_INSTRUCTION_OP_LOAD: u16 = 61; pub const SPV_INSTRUCTION_OP_STORE: u16 = 62; +pub const SPV_INSTRUCTION_OP_COPY_MEMORY: u16 = 63; pub const SPV_INSTRUCTION_OP_ACCESS_CHAIN: u16 = 65; +pub const SPV_INSTRUCTION_OP_IN_BOUNDS_ACCESS_CHAIN: u16 = 66; pub const SPV_INSTRUCTION_OP_DECORATE: u16 = 71; pub const SPV_INSTRUCTION_OP_MEMBER_DECORATE: u16 = 72; pub const SPV_INSTRUCTION_OP_COMPOSITE_CONSTRUCT: u16 = 80; @@ -65,8 +67,10 @@ pub const SPV_INSTRUCTION_OP_EXT_INST_IMPORT: u16 = 11; pub const SPV_INSTRUCTION_OP_EXT_INST: u16 = 12; pub const SPV_INSTRUCTION_OP_COMPOSITE_EXTRACT: u16 = 81; pub const SPV_INSTRUCTION_OP_IMAGE_FETCH: u16 = 95; +pub const SPV_INSTRUCTION_OP_IMAGE_TEXEL_POINTER: u16 = 67; pub const SPV_INSTRUCTION_OP_IMAGE_READ: u16 = 98; pub const SPV_INSTRUCTION_OP_IMAGE_WRITE: u16 = 99; +pub const SPV_INSTRUCTION_OP_IMAGE_SPARSE_READ: u16 = 320; pub const SPV_INSTRUCTION_OP_S_NEGATE: u16 = 126; pub const SPV_INSTRUCTION_OP_SELECT: u16 = 169; pub const SPV_INSTRUCTION_OP_S_GREATER_THAN: u16 = 173; @@ -75,6 +79,9 @@ pub const SPV_INSTRUCTION_OP_PHI: u16 = 245; pub const SPV_INSTRUCTION_OP_SELECTION_MERGE: u16 = 247; pub const SPV_INSTRUCTION_OP_BRANCH: u16 = 249; pub const SPV_INSTRUCTION_OP_BRANCH_CONDITIONAL: u16 = 250; +pub const SPV_INSTRUCTION_OP_SWITCH: u16 = 251; + +pub const SPV_SELECTION_CONTROL_NONE: u32 = 0; pub const SPV_STORAGE_CLASS_UNIFORM_CONSTANT: u32 = 0; pub const SPV_STORAGE_CLASS_UNIFORM: u32 = 2; diff --git a/src/storagecubepatch.rs b/src/storagecubepatch.rs index 99f1608..76c10bf 100644 --- a/src/storagecubepatch.rs +++ b/src/storagecubepatch.rs @@ -346,7 +346,7 @@ pub fn storagecubepatch( .iter() .map(|id| AffectedDecoration { original_res_id: *id, - new_res_id: *id, + new_res_ids: vec![*id], correction_type: CorrectionType::ConvertStorageCube, }) .collect::>(), diff --git a/src/test.rs b/src/test.rs index a38bc15..c0286e8 100644 --- a/src/test.rs +++ b/src/test.rs @@ -1,6 +1,6 @@ use super::{ combimgsampsplitter, drefsplitter, immediatespatch, isnanisinfpatch, mirrorpatch, - pruneunuseddref, storagecubepatch, u8_slice_to_u32_vec, u32_slice_to_u8_vec, + pruneunuseddref, splitbindingarray, storagecubepatch, u8_slice_to_u32_vec, u32_slice_to_u8_vec, }; use naga::{back, front, valid}; @@ -222,6 +222,7 @@ test_with_spv_and_fn_no_correction![ ]; // --- + test_with_spv_and_fn_no_correction![ immediatespatch_immediatespatch_immediates, DO_ALL, @@ -254,3 +255,48 @@ test_with_spv_and_fn_no_correction![ "./test/immediatespatch/row_major.spv", immediatespatch ]; + +// --- + +test_with_spv_and_fn![ + splitbinding_buffer_binding_array, + DO_ALL, + "./test/splitbindingarray/buffer_binding_array.spv", + splitbindingarray +]; +test_with_spv_and_fn![ + splitbinding_storage_binding_array, + DO_ALL, + "./test/splitbindingarray/storage_binding_array.spv", + splitbindingarray +]; +test_with_spv_and_fn![ + splitbinding_texture_binding_array, + DO_ALL, + "./test/splitbindingarray/texture_binding_array.spv", + splitbindingarray +]; +test_with_spv_and_fn![ + splitbinding_nested_texture_binding_array, + DO_ALL, + "./test/splitbindingarray/nested_texture_binding_array.spv", + splitbindingarray +]; +test_with_spv_and_fn![ + splitbinding_sampler_binding_array, + DO_ALL, + "./test/splitbindingarray/sampler_binding_array.spv", + splitbindingarray +]; +test_with_spv_and_fn![ + splitbinding_sampler_stub, + DO_ALL, + "./test/splitbindingarray/sampler_stub.spv", + splitbindingarray +]; +test_with_spv_and_fn![ + splitbinding_texture_array_binding_array, + DO_ALL, + "./test/splitbindingarray/texture_array_binding_array.spv", + splitbindingarray +]; diff --git a/src/test/compile.sh b/src/test/compile.sh index 28bca60..f4d057b 100755 --- a/src/test/compile.sh +++ b/src/test/compile.sh @@ -7,3 +7,4 @@ set -e (cd storagecubepatch; ./compile.sh) (cd pruneunuseddref; ./compile.sh) (cd immediatespatch; ./compile.sh) +(cd splitbindingarray; ./compile.sh) diff --git a/src/test/splitbindingarray/buffer_binding_array.frag b/src/test/splitbindingarray/buffer_binding_array.frag new file mode 100644 index 0000000..6599398 --- /dev/null +++ b/src/test/splitbindingarray/buffer_binding_array.frag @@ -0,0 +1,21 @@ +#version 440 + +#define MAX_THINGS 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0, std140) uniform Thing { + vec4 thing1; + float thing2; +} u_things[MAX_THINGS]; + +void main() { + o_color = vec4(0.0); + for (int i = 0; i < MAX_THINGS; i++) { + o_color += u_things[i].thing1; + o_color.x += u_things[i].thing2; + + o_color = u_things[i].thing1; + } +} + diff --git a/src/test/splitbindingarray/buffer_binding_array.spv b/src/test/splitbindingarray/buffer_binding_array.spv new file mode 100644 index 0000000..54c5942 Binary files /dev/null and b/src/test/splitbindingarray/buffer_binding_array.spv differ diff --git a/src/test/splitbindingarray/compile.sh b/src/test/splitbindingarray/compile.sh new file mode 100755 index 0000000..8106c31 --- /dev/null +++ b/src/test/splitbindingarray/compile.sh @@ -0,0 +1,9 @@ +set -e + +glslc -O0 buffer_binding_array.frag -o buffer_binding_array.spv +glslc -O0 storage_binding_array.frag -o storage_binding_array.spv +glslc -O0 texture_binding_array.frag -o texture_binding_array.spv +glslc -O0 sampler_binding_array.frag -o sampler_binding_array.spv +glslc -O0 nested_texture_binding_array.frag -o nested_texture_binding_array.spv +glslc -O0 sampler_stub.frag -o sampler_stub.spv +glslc -O0 texture_array_binding_array.frag -o texture_array_binding_array.spv diff --git a/src/test/splitbindingarray/nested_texture_binding_array.frag b/src/test/splitbindingarray/nested_texture_binding_array.frag new file mode 100644 index 0000000..23929e0 --- /dev/null +++ b/src/test/splitbindingarray/nested_texture_binding_array.frag @@ -0,0 +1,25 @@ +#version 440 + +#define MAX_TEXTURES 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform texture2D u_textures[MAX_TEXTURES]; +layout(set = 0, binding = 1) uniform sampler u_sampler; + +void process(texture2D textures[MAX_TEXTURES]) { + for (int i = 0; i < MAX_TEXTURES; i++) { + o_color += texture(sampler2D(textures[i], u_sampler), vec2(0.0, 0.0)); + } +} + +void process2(texture2D textures[MAX_TEXTURES]) { + process(textures); +} + +void main() { + o_color = vec4(0.0); + process(u_textures); + process2(u_textures); +} + diff --git a/src/test/splitbindingarray/nested_texture_binding_array.spv b/src/test/splitbindingarray/nested_texture_binding_array.spv new file mode 100644 index 0000000..82ce1ca Binary files /dev/null and b/src/test/splitbindingarray/nested_texture_binding_array.spv differ diff --git a/src/test/splitbindingarray/sampler_binding_array.frag b/src/test/splitbindingarray/sampler_binding_array.frag new file mode 100644 index 0000000..a910fb5 --- /dev/null +++ b/src/test/splitbindingarray/sampler_binding_array.frag @@ -0,0 +1,17 @@ +#version 440 + +#define MAX_TEXTURES 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform texture2D u_textures[MAX_TEXTURES]; +layout(set = 0, binding = 1) uniform sampler u_samplers[MAX_TEXTURES + 1]; + +void main() { + o_color = vec4(0.0); + for (int i = 0, j = 1; i < MAX_TEXTURES; i++, j++) { + o_color += texture(sampler2D(u_textures[i], u_samplers[j]), vec2(0.0, 0.0)); + o_color += texture(sampler2D(u_textures[0], u_samplers[i]), vec2(0.0, 0.0)); + } +} + diff --git a/src/test/splitbindingarray/sampler_binding_array.spv b/src/test/splitbindingarray/sampler_binding_array.spv new file mode 100644 index 0000000..daa11bd Binary files /dev/null and b/src/test/splitbindingarray/sampler_binding_array.spv differ diff --git a/src/test/splitbindingarray/sampler_stub.frag b/src/test/splitbindingarray/sampler_stub.frag new file mode 100644 index 0000000..a823682 --- /dev/null +++ b/src/test/splitbindingarray/sampler_stub.frag @@ -0,0 +1,17 @@ +#version 440 + +#define MAX_TEXTURES 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform texture2D u_textures[MAX_TEXTURES]; +layout(set = 0, binding = 1) uniform sampler u_samplers[MAX_TEXTURES]; + +void main() { + o_color = vec4(0.0); + for (int i = 0; i < MAX_TEXTURES; i++) { + // We want to ensure that this is valid even if opaque values never resolve. + texture(sampler2D(u_textures[i], u_samplers[i]), vec2(0.0, 0.0)); + } +} + diff --git a/src/test/splitbindingarray/sampler_stub.spv b/src/test/splitbindingarray/sampler_stub.spv new file mode 100644 index 0000000..644d82c Binary files /dev/null and b/src/test/splitbindingarray/sampler_stub.spv differ diff --git a/src/test/splitbindingarray/storage_binding_array.frag b/src/test/splitbindingarray/storage_binding_array.frag new file mode 100644 index 0000000..16ec4cd --- /dev/null +++ b/src/test/splitbindingarray/storage_binding_array.frag @@ -0,0 +1,20 @@ +#version 440 + +#define MAX_THINGS 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0, std140) buffer Thing { + vec4 thing1; + float thing2; +} u_things[MAX_THINGS]; + +void main() { + o_color = vec4(0.0); + for (int i = 0; i < MAX_THINGS; i++) { + o_color += u_things[i].thing1; + o_color.x += u_things[i].thing2; + u_things[i].thing2 *= 2.0; + } +} + diff --git a/src/test/splitbindingarray/storage_binding_array.spv b/src/test/splitbindingarray/storage_binding_array.spv new file mode 100644 index 0000000..b515c3a Binary files /dev/null and b/src/test/splitbindingarray/storage_binding_array.spv differ diff --git a/src/test/splitbindingarray/texture_array_binding_array.frag b/src/test/splitbindingarray/texture_array_binding_array.frag new file mode 100644 index 0000000..054a0fb --- /dev/null +++ b/src/test/splitbindingarray/texture_array_binding_array.frag @@ -0,0 +1,15 @@ +#version 440 + +#define MAX_TEXTURES 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform texture2DArray u_textures[MAX_TEXTURES]; +layout(set = 0, binding = 1) uniform sampler u_sampler; + +void main() { + o_color = vec4(0.0); + for (int i = 0; i < MAX_TEXTURES; i++) { + o_color += texture(sampler2DArray(u_textures[i], u_sampler), vec3(0.0, 0.0, 0.0)); + } +} diff --git a/src/test/splitbindingarray/texture_array_binding_array.spv b/src/test/splitbindingarray/texture_array_binding_array.spv new file mode 100644 index 0000000..1eef625 Binary files /dev/null and b/src/test/splitbindingarray/texture_array_binding_array.spv differ diff --git a/src/test/splitbindingarray/texture_binding_array.frag b/src/test/splitbindingarray/texture_binding_array.frag new file mode 100644 index 0000000..bfa7136 --- /dev/null +++ b/src/test/splitbindingarray/texture_binding_array.frag @@ -0,0 +1,16 @@ +#version 440 + +#define MAX_TEXTURES 8 + +layout(location = 0) out vec4 o_color; + +layout(set = 0, binding = 0) uniform texture2D u_textures[MAX_TEXTURES]; +layout(set = 0, binding = 1) uniform sampler u_sampler; + +void main() { + o_color = vec4(0.0); + for (int i = 0; i < MAX_TEXTURES; i++) { + o_color += texture(sampler2D(u_textures[i], u_sampler), vec2(0.0, 0.0)); + } +} + diff --git a/src/test/splitbindingarray/texture_binding_array.spv b/src/test/splitbindingarray/texture_binding_array.spv new file mode 100644 index 0000000..c330ca7 Binary files /dev/null and b/src/test/splitbindingarray/texture_binding_array.spv differ diff --git a/src/test/splitcombined/compile.sh b/src/test/splitcombined/compile.sh index 41829f6..984c003 100755 --- a/src/test/splitcombined/compile.sh +++ b/src/test/splitcombined/compile.sh @@ -1,7 +1,7 @@ set -e -glslc test.frag -o test.spv -glslc test_arrayed.frag -o test_arrayed.spv -glslc test_nested.frag -o test_nested.spv -glslc test_mixed.frag -o test_mixed.spv +glslc -O0 test.frag -o test.spv +glslc -O0 test_arrayed.frag -o test_arrayed.spv +glslc -O0 test_nested.frag -o test_nested.spv +glslc -O0 test_mixed.frag -o test_mixed.spv diff --git a/src/test/splitdref/compile.sh b/src/test/splitdref/compile.sh index 64aa387..f3f1328 100755 --- a/src/test/splitdref/compile.sh +++ b/src/test/splitdref/compile.sh @@ -1,15 +1,15 @@ set -e -glslc test_image.frag -o test_image.spv -glslc test_nested_image.frag -o test_nested_image.spv -glslc test_nested2_image.frag -o test_nested2_image.spv -glslc test_sampler.frag -o test_sampler.spv -glslc test_nested_sampler.frag -o test_nested_sampler.spv -glslc test_nested2_sampler.frag -o test_nested2_sampler.spv -glslc test_mixed_dref.frag -o test_mixed_dref.spv -glslc test_hidden_dref.frag -o test_hidden_dref.spv -glslc test_hidden2_dref.frag -o test_hidden2_dref.spv -glslc test_hidden3_dref.frag -o test_hidden3_dref.spv -glslc test_cross_dref.frag -o test_cross_dref.spv +glslc -O0 test_image.frag -o test_image.spv +glslc -O0 test_nested_image.frag -o test_nested_image.spv +glslc -O0 test_nested2_image.frag -o test_nested2_image.spv +glslc -O0 test_sampler.frag -o test_sampler.spv +glslc -O0 test_nested_sampler.frag -o test_nested_sampler.spv +glslc -O0 test_nested2_sampler.frag -o test_nested2_sampler.spv +glslc -O0 test_mixed_dref.frag -o test_mixed_dref.spv +glslc -O0 test_hidden_dref.frag -o test_hidden_dref.spv +glslc -O0 test_hidden2_dref.frag -o test_hidden2_dref.spv +glslc -O0 test_hidden3_dref.frag -o test_hidden3_dref.spv +glslc -O0 test_cross_dref.frag -o test_cross_dref.spv spirv-as test_wrong_type_image.spvasm -o test_wrong_type_image.spv diff --git a/src/test/storagecubepatch/compile.sh b/src/test/storagecubepatch/compile.sh index 941b7c7..96de343 100755 --- a/src/test/storagecubepatch/compile.sh +++ b/src/test/storagecubepatch/compile.sh @@ -1,6 +1,6 @@ set -e -glslc storagecube.frag -o storagecube.spv -glslc storagecube_nested.frag -o storagecube_nested.spv -glslc storagecube_immediate.frag -o storagecube_immediate.spv +glslc -O0 storagecube.frag -o storagecube.spv +glslc -O0 storagecube_nested.frag -o storagecube_nested.spv +glslc -O0 storagecube_immediate.frag -o storagecube_immediate.spv diff --git a/src/util.rs b/src/util.rs index f5e569c..403ad5f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -4,12 +4,16 @@ mod correct_decorate; mod decorate; mod ensure; mod function; +mod instruction; +mod opaque_trace; mod pointer; pub use correct_decorate::*; pub use decorate::*; pub use ensure::*; pub use function::*; +pub use instruction::*; +pub use opaque_trace::*; pub use pointer::*; pub fn hiword(value: u32) -> u16 { diff --git a/src/util/decorate.rs b/src/util/decorate.rs index e00aa11..7f2b459 100644 --- a/src/util/decorate.rs +++ b/src/util/decorate.rs @@ -1,9 +1,9 @@ use super::*; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct AffectedDecoration { pub original_res_id: u32, - pub new_res_id: u32, + pub new_res_ids: Vec, pub correction_type: CorrectionType, } @@ -62,21 +62,23 @@ pub fn decorate(d_in: DecorateIn) -> DecorateOut { affected_variables.iter().for_each( |AffectedDecoration { original_res_id, - new_res_id, + new_res_ids, correction_type, }| { if *original_res_id == target_id { - if decoration_id == SPV_DECORATION_BINDING { - new_variable_id_to_decorations - .entry((new_res_id, correction_type)) - .or_insert((None, None)) - .0 = Some((d_idx, decoration_value)); - } else if decoration_id == SPV_DECORATION_DESCRIPTOR_SET { - new_variable_id_to_decorations - .entry((new_res_id, correction_type)) - .or_insert((None, None)) - .1 = Some((d_idx, decoration_value)); - descriptor_sets_to_correct.insert(decoration_value); + for new_res_id in new_res_ids { + if decoration_id == SPV_DECORATION_BINDING { + new_variable_id_to_decorations + .entry((new_res_id, correction_type)) + .or_insert((None, None)) + .0 = Some((d_idx, decoration_value)); + } else if decoration_id == SPV_DECORATION_DESCRIPTOR_SET { + new_variable_id_to_decorations + .entry((new_res_id, correction_type)) + .or_insert((None, None)) + .1 = Some((d_idx, decoration_value)); + descriptor_sets_to_correct.insert(decoration_value); + } } } }, diff --git a/src/util/instruction.rs b/src/util/instruction.rs new file mode 100644 index 0000000..7c9afd5 --- /dev/null +++ b/src/util/instruction.rs @@ -0,0 +1,13 @@ +use super::*; + +// TODO: Implement `map_spirv` which iterates through a &[u32] of instructions. + +pub fn get_last_instruction_index(instructions: &[u32]) -> usize { + let mut last_off = 0; + let mut idx = 0; + while idx < instructions.len() { + last_off = idx; + idx += hiword(instructions[idx]) as usize; + } + last_off +} diff --git a/src/util/opaque_trace.rs b/src/util/opaque_trace.rs new file mode 100644 index 0000000..7bd6041 --- /dev/null +++ b/src/util/opaque_trace.rs @@ -0,0 +1,474 @@ +use super::*; + +// Opaque types cannot be operated on in the same way as non-opaque types. +// We need tools to trace the instruction chain up to the point an opaque type becomes a non-opaque. +// +// We care about OpTypeSampler and OpTypeImage, or more specifically, textures, storage textures, +// and samplers. +// +// My notes on the instruction structure: +// +// ``` +// Textures: +// - OpLoad +// - OpImageFetch +// - OpImageGather +// - OpImageDrefGather +// - OpSampledImage (DAG NODE) +// - OpImageSampleImplicitLod +// - OpImageSampleExplicitLod +// - OpImageSampleDrefImplicitLod +// - OpImageSampleDrefExplicitLod +// - OpImageSampleProjImplicitLod +// - OpImageSampleProjExplicitLod +// - OpImageSampleProjDrefImplicitLod +// - OpImageSampleProjDrefExplicitLod +// - (SparseResidency Capability) +// - OpImageSparseSample* +// OpImageGather +// OpImageDrefGather +// - (ImageQuery Capability) +// - OpImageQuerySizeLod +// - OpImageQuerySize +// - OpImageQueryLevels +// - OpImageQuerySamples +// - OpImageQueryLod +// - OpImageQueryFormat +// - OpImageQueryOrder +// - (SparseResidency Capability) +// - OpImageSparseFetch +// - OpImageSparseGather +// - OpImageSparseDrefGather +// +// Storage Textures: +// - OpLoad +// - OpImageRead +// - OpImageWrite +// - OpImageSparseRead +// - OpImageTexelPointer +// - (ImageQuery Capability) +// - OpImageQuerySizeLod +// - OpImageQuerySize +// - OpImageQueryLevels +// - OpImageQuerySamples +// - OpImageQueryLod +// - OpImageQueryFormat +// - OpImageQueryOrder +// +// Samplers: +// - OpLoad +// - OpSampledImage (DAG NODE) +// ``` +// +// We can build a DAG for the instruction chains, but if we handle sampler's `OpSampledImage` +// separately, we can get away with a tree, or just a `struct` +// + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct OpaqueLoadTrace { + pub load_idx: usize, + pub next: OpaqueImageOp, +} + +impl OpaqueLoadTrace { + pub fn last_result_id(&self) -> usize { + match self.next { + OpaqueImageOp::RawImage(raw_image_op) => raw_image_op.result_idx(), + OpaqueImageOp::RawStorage(storage_texture_op) => storage_texture_op.result_idx(), + OpaqueImageOp::Sampled(sampled_image_op) => sampled_image_op.next.result_idx(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OpaqueImageOp { + RawImage(RawImageOp), + RawStorage(StorageTextureOp), + Sampled(SampledImageOp), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RawImageOp { + Fetch(usize), + Gather(usize), + DrefGather(usize), + // TODO: Image Query Capability +} + +impl RawImageOp { + pub fn result_idx(&self) -> usize { + match self { + RawImageOp::Fetch(i) | RawImageOp::Gather(i) | RawImageOp::DrefGather(i) => *i, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SampledImageParent { + Image, + Sampler, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SampledImageOp { + pub idx: usize, + pub parent: SampledImageParent, + pub next: SampledImageVariant, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SampledImageVariant { + SampleImplicitLod(usize), + SampleExplicitLod(usize), + SampleDrefImplicitLod(usize), + SampleDrefExplicitLod(usize), + SampleProjImplicitLod(usize), + SampleProjExplicitLod(usize), + SampleProjDrefImplicitLod(usize), + SampleProjDrefExplicitLod(usize), + Gather(usize), + DrefGather(usize), + // TODO: Image Query Capability + // TODO: Sparse Residency Capability +} + +impl SampledImageVariant { + pub fn result_idx(&self) -> usize { + match self { + SampledImageVariant::SampleImplicitLod(i) + | SampledImageVariant::SampleExplicitLod(i) + | SampledImageVariant::SampleDrefImplicitLod(i) + | SampledImageVariant::SampleDrefExplicitLod(i) + | SampledImageVariant::SampleProjImplicitLod(i) + | SampledImageVariant::SampleProjExplicitLod(i) + | SampledImageVariant::SampleProjDrefImplicitLod(i) + | SampledImageVariant::SampleProjDrefExplicitLod(i) + | SampledImageVariant::Gather(i) + | SampledImageVariant::DrefGather(i) => *i, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StorageTextureOp { + Read(usize), + Write(usize), + SparseRead(usize), + TexelPointer(usize), + // TODO: Image Query Capability +} + +impl StorageTextureOp { + // OpImageWrite has no result; all other storage ops do. + pub fn result_idx(&self) -> usize { + match self { + StorageTextureOp::Read(i) + | StorageTextureOp::SparseRead(i) + | StorageTextureOp::TexelPointer(i) + | StorageTextureOp::Write(i) => *i, + } + } +} + +// Generally, spv[idx + 1] => result type, spv[idx + 2] => result, spv[idx + 3] => image / sampled image +pub fn trace_loaded_opaques(spv: &[u32], load_idxs: &[usize]) -> Vec { + // TODO: Memoize, we can do better than this. + let mut op_sampled_image_idxs = vec![]; + let mut raw_image_op_idxs: Vec<(u16, usize)> = vec![]; + let mut sampled_image_op_idxs: Vec<(u16, usize)> = vec![]; + let mut storage_op_idxs: Vec<(u16, usize)> = vec![]; + + let mut spv_idx = 0; + while spv_idx < spv.len() { + let op = spv[spv_idx]; + let word_count = hiword(op) as usize; + let instruction = loword(op); + + match instruction { + SPV_INSTRUCTION_OP_SAMPLED_IMAGE => op_sampled_image_idxs.push(spv_idx), + SPV_INSTRUCTION_OP_IMAGE_FETCH + | SPV_INSTRUCTION_OP_IMAGE_GATHER + | SPV_INSTRUCTION_OP_IMAGE_DREF_GATHER => { + raw_image_op_idxs.push((instruction, spv_idx)) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_IMPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_EXPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_DREF_IMPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_DREF_EXPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_IMPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_EXPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_DREF_IMPLICIT_LOD + | SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_DREF_EXPLICIT_LOD => { + sampled_image_op_idxs.push((instruction, spv_idx)) + } + SPV_INSTRUCTION_OP_IMAGE_READ + | SPV_INSTRUCTION_OP_IMAGE_WRITE + | SPV_INSTRUCTION_OP_IMAGE_SPARSE_READ + | SPV_INSTRUCTION_OP_IMAGE_TEXEL_POINTER => { + storage_op_idxs.push((instruction, spv_idx)) + } + _ => {} + } + + spv_idx += word_count; + } + + let load_result_ids = load_idxs + .iter() + .map(|&idx| (spv[idx + 2], idx)) + .collect::>(); + + let mut results = vec![]; + + for &(instruction, idx) in &raw_image_op_idxs { + let loaded_image_id = spv[idx + 3]; + if let Some(&load_idx) = load_result_ids.get(&loaded_image_id) { + let op = match instruction { + SPV_INSTRUCTION_OP_IMAGE_FETCH => RawImageOp::Fetch(idx), + SPV_INSTRUCTION_OP_IMAGE_GATHER => RawImageOp::Gather(idx), + SPV_INSTRUCTION_OP_IMAGE_DREF_GATHER => RawImageOp::DrefGather(idx), + _ => unreachable!(), + }; + results.push(OpaqueLoadTrace { + load_idx, + next: OpaqueImageOp::RawImage(op), + }); + } + } + + for &(instruction, idx) in &storage_op_idxs { + let image_id = if instruction == SPV_INSTRUCTION_OP_IMAGE_WRITE { + spv[idx + 1] + } else { + spv[idx + 3] + }; + if let Some(&load_idx) = load_result_ids.get(&image_id) { + let op = match instruction { + SPV_INSTRUCTION_OP_IMAGE_READ => StorageTextureOp::Read(idx), + SPV_INSTRUCTION_OP_IMAGE_WRITE => StorageTextureOp::Write(idx), + SPV_INSTRUCTION_OP_IMAGE_SPARSE_READ => StorageTextureOp::SparseRead(idx), + SPV_INSTRUCTION_OP_IMAGE_TEXEL_POINTER => StorageTextureOp::TexelPointer(idx), + _ => unreachable!(), + }; + results.push(OpaqueLoadTrace { + load_idx, + next: OpaqueImageOp::RawStorage(op), + }); + } + } + + // (result_id, sampled_image_idx, load_idx, parent) for OpSampledImage nodes rooted at our loads. + let sampled_image_entries = op_sampled_image_idxs + .iter() + .filter_map(|&si_idx| { + let image_load = load_result_ids.get(&spv[si_idx + 3]).copied(); + let sampler_load = load_result_ids.get(&spv[si_idx + 4]).copied(); + match (image_load, sampler_load) { + (Some(_), Some(_)) => { + panic!("DAG node: OpSampledImage at {si_idx} has both image and sampler from tracked loads") + } + (Some(load_idx), None) => { + Some((spv[si_idx + 2], si_idx, load_idx, SampledImageParent::Image)) + } + (None, Some(load_idx)) => { + Some((spv[si_idx + 2], si_idx, load_idx, SampledImageParent::Sampler)) + } + (None, None) => None, + } + }) + .collect::>(); + + for &(instruction, idx) in sampled_image_op_idxs.iter() { + let Some(&(_, si_idx, load_idx, parent)) = + sampled_image_entries.iter().find(|(result_id, _, _, _)| { + let loaded_image_id = spv[idx + 3]; + *result_id == loaded_image_id + }) + else { + continue; + }; + let variant = match instruction { + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_IMPLICIT_LOD => { + SampledImageVariant::SampleImplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_EXPLICIT_LOD => { + SampledImageVariant::SampleExplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_DREF_IMPLICIT_LOD => { + SampledImageVariant::SampleDrefImplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_DREF_EXPLICIT_LOD => { + SampledImageVariant::SampleDrefExplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_IMPLICIT_LOD => { + SampledImageVariant::SampleProjImplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_EXPLICIT_LOD => { + SampledImageVariant::SampleProjExplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_DREF_IMPLICIT_LOD => { + SampledImageVariant::SampleProjDrefImplicitLod(idx) + } + SPV_INSTRUCTION_OP_IMAGE_SAMPLE_PROJ_DREF_EXPLICIT_LOD => { + SampledImageVariant::SampleProjDrefExplicitLod(idx) + } + _ => unreachable!(), + }; + results.push(OpaqueLoadTrace { + load_idx, + next: OpaqueImageOp::Sampled(SampledImageOp { + idx: si_idx, + parent, + next: variant, + }), + }); + } + + for &(instruction, idx) in &raw_image_op_idxs { + let Some(&(_, si_idx, load_idx, parent)) = + sampled_image_entries.iter().find(|(result_id, _, _, _)| { + let loaded_image_id = spv[idx + 3]; + *result_id == loaded_image_id + }) + else { + continue; + }; + let variant = match instruction { + SPV_INSTRUCTION_OP_IMAGE_GATHER => SampledImageVariant::Gather(idx), + SPV_INSTRUCTION_OP_IMAGE_DREF_GATHER => SampledImageVariant::DrefGather(idx), + _ => continue, + }; + results.push(OpaqueLoadTrace { + load_idx, + next: OpaqueImageOp::Sampled(SampledImageOp { + idx: si_idx, + parent, + next: variant, + }), + }); + } + + results +} + +pub fn reconstruct_opaque_trace_and_overwrite( + spv: &[u32], + new_spv: &mut [u32], + trace: &OpaqueLoadTrace, +) -> Vec { + fn take_instruction(spv: &[u32], idx: usize) -> &[u32] { + let word_count = hiword(spv[idx]) as usize; + &spv[idx..idx + word_count] + } + + fn write_nop_instruction(new_spv: &mut [u32], idx: usize) { + let word_count = hiword(new_spv[idx]) as usize; + new_spv[idx..idx + word_count].fill(encode_word(1, SPV_INSTRUCTION_OP_NOP)); + } + + let mut out = take_instruction(spv, trace.load_idx).to_vec(); + write_nop_instruction(new_spv, trace.load_idx); + + match &trace.next { + OpaqueImageOp::RawImage(op) => { + let op_idx = match op { + RawImageOp::Fetch(i) | RawImageOp::Gather(i) | RawImageOp::DrefGather(i) => *i, + }; + out.extend_from_slice(take_instruction(spv, op_idx)); + write_nop_instruction(new_spv, op_idx); + } + OpaqueImageOp::RawStorage(op) => { + let op_idx = match op { + StorageTextureOp::Read(i) + | StorageTextureOp::Write(i) + | StorageTextureOp::SparseRead(i) + | StorageTextureOp::TexelPointer(i) => *i, + }; + out.extend_from_slice(take_instruction(spv, op_idx)); + write_nop_instruction(new_spv, op_idx); + } + OpaqueImageOp::Sampled(SampledImageOp { + idx: si_idx, next, .. + }) => { + out.extend_from_slice(take_instruction(spv, *si_idx)); + write_nop_instruction(new_spv, *si_idx); + + let op_idx = match next { + SampledImageVariant::SampleImplicitLod(i) + | SampledImageVariant::SampleExplicitLod(i) + | SampledImageVariant::SampleDrefImplicitLod(i) + | SampledImageVariant::SampleDrefExplicitLod(i) + | SampledImageVariant::SampleProjImplicitLod(i) + | SampledImageVariant::SampleProjExplicitLod(i) + | SampledImageVariant::SampleProjDrefImplicitLod(i) + | SampledImageVariant::SampleProjDrefExplicitLod(i) + | SampledImageVariant::Gather(i) + | SampledImageVariant::DrefGather(i) => *i, + }; + out.extend_from_slice(take_instruction(spv, op_idx)); + write_nop_instruction(new_spv, op_idx); + } + } + + out +} + +#[test] +fn raw_image_fetch() { + #[rustfmt::skip] + let spv: &[u32] = &[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 10, 20, 30, + encode_word(5, SPV_INSTRUCTION_OP_IMAGE_FETCH), 11, 21, 20, 40, + ]; + let traces = trace_loaded_opaques(spv, &[0]); + assert_eq!(traces.len(), 1); + assert!(matches!( + traces[0], + OpaqueLoadTrace { + load_idx: 0, + next: OpaqueImageOp::RawImage(RawImageOp::Fetch(4)) + } + )); +} + +#[test] +fn sampled_image_implicit_lod() { + #[rustfmt::skip] + let spv: &[u32] = &[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 10, 20, 30, + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 11, 21, 31, + encode_word(5, SPV_INSTRUCTION_OP_SAMPLED_IMAGE), 12, 22, 20, 21, + encode_word(5, SPV_INSTRUCTION_OP_IMAGE_SAMPLE_IMPLICIT_LOD), 13, 23, 22, 40, + ]; + let traces = trace_loaded_opaques(spv, &[0]); + assert_eq!(traces.len(), 1); + assert!(matches!( + traces[0], + OpaqueLoadTrace { + load_idx: 0, + next: OpaqueImageOp::Sampled(SampledImageOp { + idx: 8, + parent: SampledImageParent::Image, + next: SampledImageVariant::SampleImplicitLod(13), + }) + } + )); +} + +#[test] +fn storage_image_write() { + #[rustfmt::skip] + let spv: &[u32] = &[ + encode_word(4, SPV_INSTRUCTION_OP_LOAD), 10, 20, 30, + encode_word(4, SPV_INSTRUCTION_OP_IMAGE_WRITE), 20, 40, 50, + ]; + let traces = trace_loaded_opaques(spv, &[0]); + assert_eq!(traces.len(), 1); + assert!(matches!( + traces[0], + OpaqueLoadTrace { + load_idx: 0, + next: OpaqueImageOp::RawStorage(StorageTextureOp::Write(4)) + } + )); +}