From 40407c6b81292c1cc26b1613ad2b3075388a4e8a Mon Sep 17 00:00:00 2001 From: meiravgri <109056284+meiravgri@users.noreply.github.com> Date: Thu, 12 Mar 2026 12:20:52 +0200 Subject: [PATCH] MOD-14470 Add VecSimParams_GetQueryBlobSize API for safe query vector allocatio (#915) * api for VecSimParams_GetQueryBlobSize * fix dtor * move index-> null to the test start * format (cherry picked from commit 8d37cf1b2d0e3799a8d55399372366d7e01e7187) --- src/VecSim/vec_sim.cpp | 12 ++++++++++++ src/VecSim/vec_sim.h | 13 +++++++++++++ tests/unit/test_common.cpp | 26 +++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp index 1cc8ea8b0..ceb64502c 100644 --- a/src/VecSim/vec_sim.cpp +++ b/src/VecSim/vec_sim.cpp @@ -207,6 +207,18 @@ extern "C" void VecSim_Normalize(void *blob, size_t dim, VecSimType type) { } } +extern "C" size_t VecSimParams_GetQueryBlobSize(VecSimType type, size_t dim, VecSimMetric metric) { + // Assert all supported types are covered + assert(type == VecSimType_FLOAT32 || type == VecSimType_FLOAT64 || + type == VecSimType_BFLOAT16 || type == VecSimType_FLOAT16 || type == VecSimType_INT8 || + type == VecSimType_UINT8); + size_t blobSize = VecSimType_sizeof(type) * dim; + if (metric == VecSimMetric_Cosine && (type == VecSimType_INT8 || type == VecSimType_UINT8)) { + blobSize += sizeof(float); // For the norm + } + return blobSize; +} + extern "C" size_t VecSimIndex_IndexSize(VecSimIndex *index) { return index->indexSize(); } extern "C" VecSimResolveCode VecSimIndex_ResolveParams(VecSimIndex *index, VecSimRawParam *rparams, diff --git a/src/VecSim/vec_sim.h b/src/VecSim/vec_sim.h index 56110a900..4958a0a79 100644 --- a/src/VecSim/vec_sim.h +++ b/src/VecSim/vec_sim.h @@ -98,6 +98,19 @@ double VecSimIndex_GetDistanceFrom_Unsafe(VecSimIndex *index, size_t label, cons */ void VecSim_Normalize(void *blob, size_t dim, VecSimType type); +/** + * @brief Returns the required blob size for a query vector that will be normalized. + * + * For INT8/UINT8 vectors with Cosine metric, VecSim_Normalize appends the norm (a float) + * at the end of the blob, so the required size is larger than just dim * sizeof(type). + * + * @param type vector element type. + * @param dim vector dimension. + * @param metric distance metric. + * @return required blob size in bytes. + */ +size_t VecSimParams_GetQueryBlobSize(VecSimType type, size_t dim, VecSimMetric metric); + /** * @brief Return the number of vectors in the index. * @param index the index whose size is requested. diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp index ee4970adf..ac13641f9 100644 --- a/tests/unit/test_common.cpp +++ b/tests/unit/test_common.cpp @@ -836,7 +836,11 @@ class CommonTypeMetricTests : public testing::TestWithParam void test_initial_size_estimation(); - virtual void TearDown() { VecSimIndex_Free(index); } + virtual void TearDown() { + if (index) { + VecSimIndex_Free(index); + } + } VecSimIndex *index; }; @@ -880,6 +884,26 @@ TEST_P(CommonTypeMetricTests, TestInitialSizeEstimationHNSW) { this->test_initial_size_estimation(); } +TEST_P(CommonTypeMetricTests, TestGetQueryBlobSize) { + // We don't need to create an index for this test, set to nullptr to avoid cleanup issues + this->index = nullptr; + + size_t dim = 4; + VecSimType type = std::get<0>(GetParam()); + VecSimMetric metric = std::get<1>(GetParam()); + + // Call the API function + size_t actual = VecSimParams_GetQueryBlobSize(type, dim, metric); + + // Calculate expected blob size + size_t expected = dim * VecSimType_sizeof(type); + if (metric == VecSimMetric_Cosine && (type == VecSimType_INT8 || type == VecSimType_UINT8)) { + expected += sizeof(float); // For the norm + } + + ASSERT_EQ(actual, expected); +} + class CommonTypeMetricTieredTests : public CommonTypeMetricTests { protected: virtual void TearDown() override {}