diff --git a/sha2/CHANGELOG.md b/sha2/CHANGELOG.md index af12c8d60..6efda9374 100644 --- a/sha2/CHANGELOG.md +++ b/sha2/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.11.1 (UNRELEASED) +### Changed +- Removed workaround for unaligned loads in `riscv-zknh` backend ([#879]) +- `riscv-zknh` no longer requires `zbkb` (or `zbb`) target feature ([#879]) + +[#879]: https://github.com/RustCrypto/hashes/pull/879 + ## 0.11.0 (2026-03-25) ### Changed - Edition changed to 2024 and MSRV bumped to 1.85 ([#652]) diff --git a/sha2/README.md b/sha2/README.md index c19ca9ee0..c85bb6273 100644 --- a/sha2/README.md +++ b/sha2/README.md @@ -63,8 +63,7 @@ SHA-256 and SHA-512 backends: - `soft`: portable software implementation - `loongarch64-asm`: `asm!`-based implementation for LoongArch64 targets - `riscv-zknh`: uses the RISC-V `Zknh` scalar crypto extension. Experimental, - requires Nightly compiler and to enable `Zknh` and `Zbkb` (or `Zbb`) - target features at compile time. + see the [section below](#about-riscv-zknh) for more information. - `wasm32-simd128`: uses the WASM `simd128` extension. SHA-256 only backends: @@ -91,10 +90,10 @@ You can force backend selection using the following configuration flags: - `sha2_512_backend`: select SHA-512 backend. Supported values: `aarch64-sha3`, `soft`, `riscv-zknh`, `x86-avx2`. -They can be enabled using either the `RUSTFLAGS` environment variable +They can be enabled using either a `RUSTFLAGS` environment variable (e.g. `RUSTFLAGS='--cfg sha2_backend="soft"'`), or by modifying your `.cargo/config.toml` file. -Note that `sha2_backend` has higher priority than `sha2_256_backend` and `sha2_512_backend`. +Note that `sha2_backend` has a higher priority than `sha2_256_backend` and `sha2_512_backend`. In other words, using `--cfg sha2_backend="soft" --cfg sha2_256_backend="x86_sha"` will result in selection of the software backend for SHA-256. @@ -103,6 +102,16 @@ performance at the cost of a bigger resulting binary. You can disable unrolling by using `sha2_backend_soft = "compact"` and `sha2_backend_riscv_zknh = "compact"` configuration flags respectively. +### About `riscv-zknh` + +This is an experimental RISC-V-only backend which requires a Nightly compiler and +to enable the `Zknh` target feature at compile time. For a more efficient code generation, +it's recommended to enable the `Zbkb` (or `Zbb`) and `unaligned-scalar-mem` target features +(see [this LLVM issue][Zicclsm] for more information). For example, you can do it by using +`RUSTFLAGS="-C target-feature=+zknh,+zbkb,+unaligned-scalar-mem"`. + +[Zicclsm]: https://github.com/llvm/llvm-project/issues/110454 + ## License The crate is licensed under either of: diff --git a/sha2/src/sha256.rs b/sha2/src/sha256.rs index 5239ca15a..494bc6c06 100644 --- a/sha2/src/sha256.rs +++ b/sha2/src/sha256.rs @@ -5,11 +5,8 @@ cfg_if::cfg_if! { } else if #[cfg(any(sha2_backend = "riscv-zknh", sha2_256_backend = "riscv-zknh"))] { mod riscv_zknh; - #[cfg(not(all( - target_feature = "zknh", - any(target_feature = "zbb", target_feature = "zbkb") - )))] - compile_error!("riscv-zknh backend requires zknh and zbkb (or zbb) target features"); + #[cfg(not(target_feature = "zknh"))] + compile_error!("riscv-zknh backend requires `zknh` target feature"); fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { // SAFETY: we checked above that the required target features are enabled @@ -22,7 +19,7 @@ cfg_if::cfg_if! { target_feature = "sha", target_feature = "sse4.1", )))] - compile_error!("x86-sha backend requires sha and sse4.1 target features"); + compile_error!("x86-sha backend requires `sha` and `sse4.1` target features"); fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { // SAFETY: we checked above that the required target features are enabled @@ -32,7 +29,7 @@ cfg_if::cfg_if! { mod aarch64_sha2; #[cfg(not(target_feature = "sha2"))] - compile_error!("aarch64-sha2 backend requires sha2 target feature"); + compile_error!("aarch64-sha2 backend requires `sha2` target feature"); fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { // SAFETY: we checked above that the required target features are enabled diff --git a/sha2/src/sha256/riscv_zknh.rs b/sha2/src/sha256/riscv_zknh.rs index a596a2209..fd4115bde 100644 --- a/sha2/src/sha256/riscv_zknh.rs +++ b/sha2/src/sha256/riscv_zknh.rs @@ -1,8 +1,6 @@ #[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))] compile_error!("riscv-zknh backend can be used only on riscv32 and riscv64 target arches"); -mod utils; - #[cfg(target_arch = "riscv32")] use core::arch::riscv32::{sha256sig0, sha256sig1, sha256sum0, sha256sum1}; #[cfg(target_arch = "riscv64")] @@ -11,9 +9,20 @@ use core::arch::riscv64::{sha256sig0, sha256sig1, sha256sum0, sha256sum1}; cfg_if::cfg_if! { if #[cfg(sha2_backend_riscv_zknh = "compact")] { mod compact; - pub(super) use compact::compress; + use compact::compress_block; } else { mod unroll; - pub(super) use unroll::compress; + use unroll::compress_block; + } +} + +#[target_feature(enable = "zknh")] +pub(super) fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + for block in blocks { + let block: [u32; 16] = core::array::from_fn(|i| { + let chunk = block[4 * i..][..4].try_into().unwrap(); + u32::from_be_bytes(chunk) + }); + compress_block(state, block); } } diff --git a/sha2/src/sha256/riscv_zknh/compact.rs b/sha2/src/sha256/riscv_zknh/compact.rs index 83840560b..7121128b7 100644 --- a/sha2/src/sha256/riscv_zknh/compact.rs +++ b/sha2/src/sha256/riscv_zknh/compact.rs @@ -2,14 +2,7 @@ use super::{sha256sig0, sha256sig1, sha256sum0, sha256sum1}; use crate::consts::K32; #[target_feature(enable = "zknh")] -pub(in super::super) fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { - for block in blocks.iter().map(super::utils::load_block) { - compress_block(state, block); - } -} - -#[target_feature(enable = "zknh")] -fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { +pub(super) fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { let mut s = *state; for r in 0..64 { diff --git a/sha2/src/sha256/riscv_zknh/unroll.rs b/sha2/src/sha256/riscv_zknh/unroll.rs index e8b702e58..d9cfc80e4 100644 --- a/sha2/src/sha256/riscv_zknh/unroll.rs +++ b/sha2/src/sha256/riscv_zknh/unroll.rs @@ -2,14 +2,7 @@ use super::{sha256sig0, sha256sig1, sha256sum0, sha256sum1}; use crate::consts::K32; #[target_feature(enable = "zknh")] -pub(in super::super) fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { - for block in blocks.iter().map(super::utils::load_block) { - compress_block(state, block); - } -} - -#[target_feature(enable = "zknh")] -fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { +pub(super) fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { let s = &mut state.clone(); let b = &mut block; @@ -82,7 +75,7 @@ fn round(state: &mut [u32; 8], block: &[u32; 16], k: &[u32]) { state[h] = state[h] .wrapping_add(sha256sum1(state[e])) .wrapping_add(ch(state[e], state[f], state[g])) - .wrapping_add(super::utils::opaque_load::(k)) + .wrapping_add(opaque_load::(k)) .wrapping_add(block[R]); state[d] = state[d].wrapping_add(state[h]); state[h] = state[h] @@ -99,3 +92,33 @@ fn ch(x: u32, y: u32, z: u32) -> u32 { fn maj(x: u32, y: u32, z: u32) -> u32 { (x & y) ^ (x & z) ^ (y & z) } + +/// This function returns `k[R]`, but prevents the compiler from inlining the indexed value +fn opaque_load(k: &[u32]) -> u32 { + assert!(R < k.len()); + let dst; + + #[cfg(target_arch = "riscv64")] + unsafe { + core::arch::asm!( + "lwu {dst}, 4*{R}({k})", + R = const R, + k = in(reg) k.as_ptr(), + dst = out(reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + } + + #[cfg(target_arch = "riscv32")] + unsafe { + core::arch::asm!( + "lw {dst}, 4*{R}({k})", + R = const R, + k = in(reg) k.as_ptr(), + dst = out(reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + } + + dst +} diff --git a/sha2/src/sha256/riscv_zknh/utils.rs b/sha2/src/sha256/riscv_zknh/utils.rs deleted file mode 100644 index 2ec54977e..000000000 --- a/sha2/src/sha256/riscv_zknh/utils.rs +++ /dev/null @@ -1,97 +0,0 @@ -use core::{arch::asm, ptr}; - -#[inline(always)] -pub(super) fn load_block(block: &[u8; 64]) -> [u32; 16] { - if block.as_ptr().cast::().is_aligned() { - load_aligned_block(block) - } else { - load_unaligned_block(block) - } -} - -#[inline(always)] -fn load_aligned_block(block: &[u8; 64]) -> [u32; 16] { - let p: *const u32 = block.as_ptr().cast(); - debug_assert!(p.is_aligned()); - let mut res = [0u32; 16]; - for i in 0..16 { - let val = unsafe { ptr::read(p.add(i)) }; - res[i] = val.to_be(); - } - res -} - -/// Use LW instruction on RV32 and LWU on RV64 -#[cfg(target_arch = "riscv32")] -macro_rules! lw { - ($r:literal) => { - concat!("lw ", $r) - }; -} -#[cfg(target_arch = "riscv64")] -macro_rules! lw { - ($r:literal) => { - concat!("lwu ", $r) - }; -} - -#[inline(always)] -fn load_unaligned_block(block: &[u8; 64]) -> [u32; 16] { - let offset = (block.as_ptr() as usize) % align_of::(); - debug_assert_ne!(offset, 0); - let off1 = (8 * offset) % 32; - let off2 = (32 - off1) % 32; - let bp: *const u32 = block.as_ptr().wrapping_sub(offset).cast(); - - let mut left: u32; - let mut res = [0u32; 16]; - - unsafe { - asm!( - lw!("{left}, 0({bp})"), // left = unsafe { ptr::read(bp) }; - "srl {left}, {left}, {off1}", // left >>= off1; - bp = in(reg) bp, - off1 = in(reg) off1, - left = out(reg) left, - options(pure, nostack, readonly, preserves_flags), - ); - } - - for i in 0..15 { - let right = unsafe { ptr::read(bp.add(1 + i)) }; - res[i] = (left | (right << off2)).to_be(); - left = right >> off1; - } - - let right: u32; - unsafe { - asm!( - lw!("{right}, 16 * 4({bp})"), // right = ptr::read(bp.add(16)); - "sll {right}, {right}, {off2}", // right <<= off2; - bp = in(reg) bp, - off2 = in(reg) off2, - right = out(reg) right, - options(pure, nostack, readonly, preserves_flags), - ); - } - res[15] = (left | right).to_be(); - - res -} - -/// This function returns `k[R]`, but prevents compiler from inlining the indexed value -#[cfg(not(sha2_backend_riscv_zknh = "compact"))] -pub(super) fn opaque_load(k: &[u32]) -> u32 { - assert!(R < k.len()); - let dst; - unsafe { - core::arch::asm!( - lw!("{dst}, 4*{R}({k})"), - R = const R, - k = in(reg) k.as_ptr(), - dst = out(reg) dst, - options(pure, readonly, nostack, preserves_flags), - ); - } - dst -} diff --git a/sha2/src/sha512.rs b/sha2/src/sha512.rs index 03cd57619..1822243de 100644 --- a/sha2/src/sha512.rs +++ b/sha2/src/sha512.rs @@ -5,11 +5,8 @@ cfg_if::cfg_if! { } else if #[cfg(any(sha2_backend = "riscv-zknh", sha2_256_backend = "riscv-zknh"))] { mod riscv_zknh; - #[cfg(not(all( - target_feature = "zknh", - any(target_feature = "zbb", target_feature = "zbkb") - )))] - compile_error!("riscv-zknh backend requires zknh and zbkb (or zbb) target features"); + #[cfg(not(target_feature = "zknh"))] + compile_error!("riscv-zknh backend requires `zknh` target features"); fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { // SAFETY: we checked above that the required target features are enabled @@ -19,7 +16,7 @@ cfg_if::cfg_if! { mod x86_avx2; #[cfg(not(target_feature = "avx2"))] - compile_error!("x86-avx2 backend requires avx2 target feature"); + compile_error!("x86-avx2 backend requires `avx2` target feature"); fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { // SAFETY: we checked above that the required target features are enabled @@ -29,7 +26,7 @@ cfg_if::cfg_if! { mod aarch64_sha3; #[cfg(not(target_feature = "sha3"))] - compile_error!("aarch64-sha3 backend requires sha3 target feature"); + compile_error!("aarch64-sha3 backend requires `sha3` target feature"); fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { // SAFETY: we checked above that the required target features are enabled diff --git a/sha2/src/sha512/riscv_zknh.rs b/sha2/src/sha512/riscv_zknh.rs index e5dc79e11..4c6d2c5f6 100644 --- a/sha2/src/sha512/riscv_zknh.rs +++ b/sha2/src/sha512/riscv_zknh.rs @@ -1,52 +1,61 @@ #[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))] compile_error!("riscv-zknh backend can be used only on riscv32 and riscv64 target arches"); -mod utils; - cfg_if::cfg_if! { if #[cfg(sha2_backend_riscv_zknh = "compact")] { mod compact; - pub(super) use compact::compress; + use compact::compress_block; } else { mod unroll; - pub(super) use unroll::compress; + use unroll::compress_block; } } -#[cfg(target_arch = "riscv64")] -use core::arch::riscv64::{sha512sig0, sha512sig1, sha512sum0, sha512sum1}; - -#[cfg(target_arch = "riscv32")] -use core::arch::riscv32::*; - -#[cfg(target_arch = "riscv32")] -#[target_feature(enable = "zknh")] -fn sha512sum0(x: u64) -> u64 { - let a = sha512sum0r((x >> 32) as u32, x as u32); - let b = sha512sum0r(x as u32, (x >> 32) as u32); - ((a as u64) << 32) | (b as u64) -} - -#[cfg(target_arch = "riscv32")] -#[target_feature(enable = "zknh")] -fn sha512sum1(x: u64) -> u64 { - let a = sha512sum1r((x >> 32) as u32, x as u32); - let b = sha512sum1r(x as u32, (x >> 32) as u32); - ((a as u64) << 32) | (b as u64) -} - -#[cfg(target_arch = "riscv32")] #[target_feature(enable = "zknh")] -fn sha512sig0(x: u64) -> u64 { - let a = sha512sig0h((x >> 32) as u32, x as u32); - let b = sha512sig0l(x as u32, (x >> 32) as u32); - ((a as u64) << 32) | (b as u64) +pub(super) fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + for block in blocks { + let block: [u64; 16] = core::array::from_fn(|i| { + let chunk = block[8 * i..][..8].try_into().unwrap(); + u64::from_be_bytes(chunk) + }); + compress_block(state, block); + } } -#[cfg(target_arch = "riscv32")] -#[target_feature(enable = "zknh")] -fn sha512sig1(x: u64) -> u64 { - let a = sha512sig1h((x >> 32) as u32, x as u32); - let b = sha512sig1l(x as u32, (x >> 32) as u32); - ((a as u64) << 32) | (b as u64) +cfg_if::cfg_if! { + if #[cfg(target_arch = "riscv64")] { + use core::arch::riscv64::{sha512sig0, sha512sig1, sha512sum0, sha512sum1}; + } else { + use core::arch::riscv32::{ + sha512sig0h, sha512sig0l, sha512sig1h, sha512sig1l, sha512sum0r, sha512sum1r, + }; + + #[target_feature(enable = "zknh")] + fn sha512sum0(x: u64) -> u64 { + let a = sha512sum0r((x >> 32) as u32, x as u32); + let b = sha512sum0r(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) + } + + #[target_feature(enable = "zknh")] + fn sha512sum1(x: u64) -> u64 { + let a = sha512sum1r((x >> 32) as u32, x as u32); + let b = sha512sum1r(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) + } + + #[target_feature(enable = "zknh")] + fn sha512sig0(x: u64) -> u64 { + let a = sha512sig0h((x >> 32) as u32, x as u32); + let b = sha512sig0l(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) + } + + #[target_feature(enable = "zknh")] + fn sha512sig1(x: u64) -> u64 { + let a = sha512sig1h((x >> 32) as u32, x as u32); + let b = sha512sig1l(x as u32, (x >> 32) as u32); + ((a as u64) << 32) | (b as u64) + } + } } diff --git a/sha2/src/sha512/riscv_zknh/compact.rs b/sha2/src/sha512/riscv_zknh/compact.rs index 865288ee4..baf370b1d 100644 --- a/sha2/src/sha512/riscv_zknh/compact.rs +++ b/sha2/src/sha512/riscv_zknh/compact.rs @@ -2,14 +2,7 @@ use super::{sha512sig0, sha512sig1, sha512sum0, sha512sum1}; use crate::consts::K64; #[target_feature(enable = "zknh")] -pub(in super::super) fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { - for block in blocks.iter().map(super::utils::load_block) { - compress_block(state, block); - } -} - -#[target_feature(enable = "zknh")] -fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { +pub(super) fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { let mut s = *state; for r in 0..80 { diff --git a/sha2/src/sha512/riscv_zknh/unroll.rs b/sha2/src/sha512/riscv_zknh/unroll.rs index 9f21fa221..89d9e55c7 100644 --- a/sha2/src/sha512/riscv_zknh/unroll.rs +++ b/sha2/src/sha512/riscv_zknh/unroll.rs @@ -2,14 +2,7 @@ use super::{sha512sig0, sha512sig1, sha512sum0, sha512sum1}; use crate::consts::K64; #[target_feature(enable = "zknh")] -pub(in super::super) fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { - for block in blocks.iter().map(super::utils::load_block) { - compress_block(state, block); - } -} - -#[target_feature(enable = "zknh")] -fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { +pub(super) fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { let s = &mut state.clone(); let b = &mut block; @@ -82,7 +75,7 @@ fn round(state: &mut [u64; 8], block: &[u64; 16], k: &[u64]) { state[h] = state[h] .wrapping_add(sha512sum1(state[e])) .wrapping_add(ch(state[e], state[f], state[g])) - .wrapping_add(super::utils::opaque_load::(k)) + .wrapping_add(opaque_load::(k)) .wrapping_add(block[R]); state[d] = state[d].wrapping_add(state[h]); state[h] = state[h] @@ -99,3 +92,34 @@ fn ch(x: u64, y: u64, z: u64) -> u64 { fn maj(x: u64, y: u64, z: u64) -> u64 { (x & y) ^ (x & z) ^ (y & z) } + +/// This function returns `k[R]`, but prevents compiler from inlining the indexed value +fn opaque_load(k: &[u64]) -> u64 { + assert!(R < k.len()); + #[cfg(target_arch = "riscv64")] + unsafe { + let dst; + core::arch::asm!( + "ld {dst}, 8 * {R}({k})", + R = const R, + k = in(reg) k.as_ptr(), + dst = out(reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + dst + } + #[cfg(target_arch = "riscv32")] + unsafe { + let [hi, lo]: [u32; 2]; + core::arch::asm!( + "lw {lo}, 8 * {R}({k})", + "lw {hi}, 8 * {R} + 4({k})", + R = const R, + k = in(reg) k.as_ptr(), + lo = out(reg) lo, + hi = out(reg) hi, + options(pure, readonly, nostack, preserves_flags), + ); + ((hi as u64) << 32) | (lo as u64) + } +} diff --git a/sha2/src/sha512/riscv_zknh/utils.rs b/sha2/src/sha512/riscv_zknh/utils.rs deleted file mode 100644 index 440682ea7..000000000 --- a/sha2/src/sha512/riscv_zknh/utils.rs +++ /dev/null @@ -1,163 +0,0 @@ -use core::{arch::asm, ptr}; - -#[inline(always)] -pub(super) fn load_block(block: &[u8; 128]) -> [u64; 16] { - if block.as_ptr().cast::().is_aligned() { - load_aligned_block(block) - } else { - load_unaligned_block(block) - } -} - -#[cfg(target_arch = "riscv32")] -fn load_aligned_block(block: &[u8; 128]) -> [u64; 16] { - let p: *const [u32; 32] = block.as_ptr().cast(); - debug_assert!(p.is_aligned()); - let block = unsafe { &*p }; - let mut res = [0u64; 16]; - for i in 0..16 { - let a = block[2 * i].to_be() as u64; - let b = block[2 * i + 1].to_be() as u64; - res[i] = (a << 32) | b; - } - res -} - -#[cfg(target_arch = "riscv64")] -fn load_aligned_block(block: &[u8; 128]) -> [u64; 16] { - let block_ptr: *const u64 = block.as_ptr().cast(); - debug_assert!(block_ptr.is_aligned()); - let mut res = [0u64; 16]; - for i in 0..16 { - let val = unsafe { ptr::read(block_ptr.add(i)) }; - res[i] = val.to_be(); - } - res -} - -#[cfg(target_arch = "riscv32")] -fn load_unaligned_block(block: &[u8; 128]) -> [u64; 16] { - let offset = (block.as_ptr() as usize) % align_of::(); - debug_assert_ne!(offset, 0); - let off1 = (8 * offset) % 32; - let off2 = (32 - off1) % 32; - let bp: *const u32 = block.as_ptr().wrapping_sub(offset).cast(); - - let mut left: u32; - let mut block32 = [0u32; 32]; - - unsafe { - asm!( - "lw {left}, 0({bp})", // left = unsafe { ptr::read(bp) }; - "srl {left}, {left}, {off1}", // left >>= off1; - bp = in(reg) bp, - off1 = in(reg) off1, - left = out(reg) left, - options(pure, nostack, readonly, preserves_flags), - ); - } - - for i in 0..31 { - let right = unsafe { ptr::read(bp.add(1 + i)) }; - block32[i] = left | (right << off2); - left = right >> off1; - } - - let right: u32; - unsafe { - asm!( - "lw {right}, 32 * 4({bp})", // right = ptr::read(bp.add(32)); - "sll {right}, {right}, {off2}", // right <<= off2; - bp = in(reg) bp, - off2 = in(reg) off2, - right = out(reg) right, - options(pure, nostack, readonly, preserves_flags), - ); - } - block32[31] = left | right; - - let mut block64 = [0u64; 16]; - for i in 0..16 { - let a = block32[2 * i].to_be() as u64; - let b = block32[2 * i + 1].to_be() as u64; - block64[i] = (a << 32) | b; - } - block64 -} - -#[cfg(target_arch = "riscv64")] -fn load_unaligned_block(block: &[u8; 128]) -> [u64; 16] { - let offset = (block.as_ptr() as usize) % align_of::(); - debug_assert_ne!(offset, 0); - let off1 = (8 * offset) % 64; - let off2 = (64 - off1) % 64; - let bp: *const u64 = block.as_ptr().wrapping_sub(offset).cast(); - - let mut left: u64; - let mut res = [0u64; 16]; - - unsafe { - asm!( - "ld {left}, 0({bp})", // left = unsafe { ptr::read(bp) }; - "srl {left}, {left}, {off1}", // left >>= off1; - bp = in(reg) bp, - off1 = in(reg) off1, - left = out(reg) left, - options(pure, nostack, readonly, preserves_flags), - ); - } - for i in 0..15 { - let right = unsafe { ptr::read(bp.add(1 + i)) }; - res[i] = (left | (right << off2)).to_be(); - left = right >> off1; - } - - let right: u64; - unsafe { - asm!( - "ld {right}, 16 * 8({bp})", // right = ptr::read(bp.add(16)); - "sll {right}, {right}, {off2}", // right <<= off2; - bp = in(reg) bp, - off2 = in(reg) off2, - right = out(reg) right, - options(pure, nostack, readonly, preserves_flags), - ); - } - res[15] = (left | right).to_be(); - - res -} - -/// This function returns `k[R]`, but prevents compiler from inlining the indexed value -#[cfg(not(sha2_backend_riscv_zknh = "compact"))] -pub(super) fn opaque_load(k: &[u64]) -> u64 { - use core::arch::asm; - - assert!(R < k.len()); - #[cfg(target_arch = "riscv64")] - unsafe { - let dst; - asm!( - "ld {dst}, 8 * {R}({k})", - R = const R, - k = in(reg) k.as_ptr(), - dst = out(reg) dst, - options(pure, readonly, nostack, preserves_flags), - ); - dst - } - #[cfg(target_arch = "riscv32")] - unsafe { - let [hi, lo]: [u32; 2]; - asm!( - "lw {lo}, 8 * {R}({k})", - "lw {hi}, 8 * {R} + 4({k})", - R = const R, - k = in(reg) k.as_ptr(), - lo = out(reg) lo, - hi = out(reg) hi, - options(pure, readonly, nostack, preserves_flags), - ); - ((hi as u64) << 32) | (lo as u64) - } -}