Skip to content

Commit 9ffa02a

Browse files
Krzysztof Rymskicopybara-github
authored andcommitted
Fix int8
PiperOrigin-RevId: 882587013
1 parent d6e836c commit 9ffa02a

2 files changed

Lines changed: 3 additions & 4 deletions

File tree

compression/compress-inl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ struct CompressTraits<int8_t> {
472472
const auto vi32_1 = hn::NearestInt(v1);
473473
const auto vi16 = hn::OrderedDemote2To(di16, vi32_0, vi32_1);
474474
const auto vi8 = hn::OrderedDemote2To(
475-
di8_16, hn::UpperHalf(di16_16, vi16), hn::LowerHalf(di16_16, vi16));
475+
di8_16, hn::LowerHalf(di16_16, vi16), hn::UpperHalf(di16_16, vi16));
476476
hn::StoreU(vi8, di8_16, packed.ptr + packed_ofs + i);
477477
}
478478
}
@@ -487,7 +487,7 @@ struct CompressTraits<int8_t> {
487487
const auto vi32_1 = hn::NearestInt(v1);
488488
const auto vi16 = hn::OrderedDemote2To(di16, vi32_0, vi32_1);
489489
const auto vi8 = hn::OrderedDemote2To(
490-
di8_16, hn::UpperHalf(di16_16, vi16), hn::LowerHalf(di16_16, vi16));
490+
di8_16, hn::LowerHalf(di16_16, vi16), hn::UpperHalf(di16_16, vi16));
491491
hn::StoreN(vi8, di8_16, packed.ptr + packed_ofs + i, remaining);
492492
}
493493
}

gemma/tiled_attention.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,7 @@ static HWY_INLINE void ComputeQKVTransposedTile(
249249
v_cache_values = v_buf;
250250
}
251251

252-
if (attention_impl == AttentionImpl::kFlashTransposedQsBF16 &&
253-
!IsInt8<KV_T>()) {
252+
if (attention_impl == AttentionImpl::kFlashTransposedQsBF16) {
254253
const int in_tile_idx_mod_2 = in_tile_idx % 2;
255254
for (int dim = 0; dim < qkv_dim; dim += 2) {
256255
const int dim_mod_2 = dim % 2;

0 commit comments

Comments
 (0)