From 197c1a049c09330e4c1899ff87b1e1d3dd24d9a5 Mon Sep 17 00:00:00 2001 From: Krzysztof Rymski Date: Thu, 12 Mar 2026 08:42:56 -0700 Subject: [PATCH] Fix int8 PiperOrigin-RevId: 882611833 --- compression/compress-inl.h | 4 ++-- gemma/tiled_attention.cc | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compression/compress-inl.h b/compression/compress-inl.h index 5f09df39..a6aa5e36 100644 --- a/compression/compress-inl.h +++ b/compression/compress-inl.h @@ -472,7 +472,7 @@ struct CompressTraits { const auto vi32_1 = hn::NearestInt(v1); const auto vi16 = hn::OrderedDemote2To(di16, vi32_0, vi32_1); const auto vi8 = hn::OrderedDemote2To( - di8_16, hn::UpperHalf(di16_16, vi16), hn::LowerHalf(di16_16, vi16)); + di8_16, hn::LowerHalf(di16_16, vi16), hn::UpperHalf(di16_16, vi16)); hn::StoreU(vi8, di8_16, packed.ptr + packed_ofs + i); } } @@ -487,7 +487,7 @@ struct CompressTraits { const auto vi32_1 = hn::NearestInt(v1); const auto vi16 = hn::OrderedDemote2To(di16, vi32_0, vi32_1); const auto vi8 = hn::OrderedDemote2To( - di8_16, hn::UpperHalf(di16_16, vi16), hn::LowerHalf(di16_16, vi16)); + di8_16, hn::LowerHalf(di16_16, vi16), hn::UpperHalf(di16_16, vi16)); hn::StoreN(vi8, di8_16, packed.ptr + packed_ofs + i, remaining); } } diff --git a/gemma/tiled_attention.cc b/gemma/tiled_attention.cc index a5b0e81e..cd8eaa1c 100644 --- a/gemma/tiled_attention.cc +++ b/gemma/tiled_attention.cc @@ -249,8 +249,7 @@ static HWY_INLINE void ComputeQKVTransposedTile( v_cache_values = v_buf; } - if (attention_impl == AttentionImpl::kFlashTransposedQsBF16 && - !IsInt8()) { + if (attention_impl == AttentionImpl::kFlashTransposedQsBF16) { const int in_tile_idx_mod_2 = in_tile_idx % 2; for (int dim = 0; dim < qkv_dim; dim += 2) { const int dim_mod_2 = dim % 2;