Skip to content

Develop pre-processor for naive SQ8 [MOD-9238] #688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7a6c10a
make CreatePreprocessorsContainerParams templated and move it to head…
meiravgri May 8, 2025
cc4281a
plan for the tests
meiravgri May 8, 2025
74885a3
Merge remote-tracking branch 'origin/main' into meiravg_fix_blob_copy…
meiravgri May 11, 2025
86a44a9
rename original_blob_size-> input_blob_size
meiravgri May 12, 2025
3e15e76
preprocessors now change the blob size
meiravgri May 12, 2025
1863722
fix test
meiravgri May 12, 2025
55837ba
fix tiered test
meiravgri May 12, 2025
b1699ad
add assert storage_blob == nullptr || input_blob_size == processed_by…
meiravgri May 17, 2025
6dc543d
enable assert only in debug
meiravgri May 17, 2025
3e673b7
use constexpr for blob size
meiravgri May 17, 2025
8967d40
small docs changes
meiravgri May 18, 2025
674b136
review fixes
meiravgri May 27, 2025
d529f5e
ש
meiravgri May 27, 2025
af11142
notes and changes
dor-forer May 28, 2025
5461b97
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer May 28, 2025
eacd40f
Added tests and changes to the PP
dor-forer May 29, 2025
adec86b
frmat
dor-forer May 29, 2025
31a0c7d
Fix and add tests
dor-forer Jun 3, 2025
59fb16d
added tests for coverege
dor-forer Jun 4, 2025
866d8cb
format
dor-forer Jun 4, 2025
ec4a3a7
Remove the tests
dor-forer Jun 4, 2025
985c2c8
Fix test
dor-forer Jun 4, 2025
b7aeb2d
change to input output type
dor-forer Jun 5, 2025
b1fad81
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer Jun 5, 2025
a8aee99
format
dor-forer Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/VecSim/spaces/computer/preprocessor_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,17 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o

void *storage_blob = nullptr;
void *query_blob = nullptr;

// Use of separate variables for the storage_blob_size and query_blob_size, in case we need to
// change their sizes to different values.
size_t storage_blob_size = input_blob_size;
size_t query_blob_size = input_blob_size;

for (auto pp : preprocessors) {
if (!pp)
break;
pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size,
this->alignment);
}
// At least one blob was allocated.

Expand Down
185 changes: 185 additions & 0 deletions src/VecSim/spaces/computer/preprocessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <cstddef>
#include <memory>
#include <cassert>
#include <cmath>

#include "VecSim/memory/vecsim_base.h"
#include "VecSim/spaces/spaces.h"
Expand All @@ -23,8 +24,12 @@
: VecsimBaseObject(allocator) {}
// Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
// are the same size.
// Use the the overload below for different sizes.
virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const = 0;
virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const = 0;
virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
size_t &input_blob_size) const = 0;
virtual void preprocessQuery(const void *original_blob, void *&query_blob,
Expand All @@ -44,6 +49,20 @@
: PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
dim(dim), processed_bytes_count(processed_bytes_count) {}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const override {
// This assert verifies that that the current use of this function is for blobs of the same
// size, which is the case for the Cosine preprocessor. If we ever need to support different
// sizes for storage and query blobs, we can remove the assert and implement the logic to
// handle different sizes.
assert(storage_blob_size == query_blob_size);

preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
// Ensure both blobs have the same size after processing.
query_blob_size = storage_blob_size;
}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const override {
// This assert verifies that if a blob was allocated by a previous preprocessor, its
Expand Down Expand Up @@ -128,3 +147,169 @@
const size_t dim;
const size_t processed_bytes_count;
};

/*
* QuantPreprocessor is a preprocessor that quantizes the input vector of INPUT_TYPE (float) to a
* lower precision representation using OUTPUT_TYPE (uint8_t). It stores the quantized values along
* with metadata (min value and scaling factor) in a single contiguous blob. The quantized values
* are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and
* maximum values of the input vector, and then scaling the values to fit in the range of [0, 255].
* The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)
*/
class QuantPreprocessor : public PreprocessorInterface {
using INPUT_TYPE = float;
using OUTPUT_TYPE = uint8_t;

public:
// Constructor for backward compatibility (single blob size)
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
: PreprocessorInterface(allocator), dim(dim),
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)) {
} // quantized + min + delta

// Helper function to perform quantization. This function is used by both preprocess and
// preprocessQuery and supports in-place quantization of the storage blob.
void quantize(const INPUT_TYPE *input, OUTPUT_TYPE *quantized) const {
assert(input && quantized);
// Find min and max values
auto [min_val, max_val] = find_min_max(input);

// Calculate scaling factor
const float diff = (max_val - min_val);
const float delta = diff == 0.0f ? 1.0f : diff / 255.0f;
const float inv_delta = 1.0f / delta;

// Quantize the values
for (size_t i = 0; i < this->dim; i++) {
quantized[i] = static_cast<OUTPUT_TYPE>(std::round((input[i] - min_val) * inv_delta));
}

float *metadata = reinterpret_cast<float *>(quantized + this->dim);

// Store min_val, delta, in the metadata
metadata[0] = min_val;
metadata[1] = delta;
}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,

Check warning on line 194 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L194

Added line #L194 was not covered by tests
size_t &input_blob_size, unsigned char alignment) const override {
// For backward compatibility - delegate to the two-size version with identical sizes
preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size,

Check warning on line 197 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L197

Added line #L197 was not covered by tests
alignment);
}

Check warning on line 199 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L199

Added line #L199 was not covered by tests

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const override {
// CASE 1: STORAGE BLOB NEEDS ALLOCATION
if (!storage_blob) {
// Allocate aligned memory for the quantized storage blob
storage_blob = static_cast<OUTPUT_TYPE *>(
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));

// Quantize directly from original data
const INPUT_TYPE *input = static_cast<const INPUT_TYPE *>(original_blob);
quantize(input, static_cast<OUTPUT_TYPE *>(storage_blob));
}
// CASE 2: STORAGE BLOB EXISTS
else {
// CASE 2A: STORAGE AND QUERY SHARE MEMORY
if (storage_blob == query_blob) {
// Need to allocate a separate storage blob since query remains float32
// while storage needs to be quantized
void *new_storage =
this->allocator->allocate_aligned(this->storage_bytes_count, alignment);

// Quantize from the shared blob (query_blob) to the new storage blob
quantize(static_cast<const INPUT_TYPE *>(query_blob),
static_cast<OUTPUT_TYPE *>(new_storage));

// Update storage_blob to point to the new memory
storage_blob = new_storage;
}
// CASE 2B: SEPARATE STORAGE AND QUERY BLOBS
else {
// Check if storage blob needs resizing
if (storage_blob_size < this->storage_bytes_count) {
// Allocate new storage with correct size
OUTPUT_TYPE *new_storage = static_cast<OUTPUT_TYPE *>(
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));

// Quantize from old storage to new storage
quantize(static_cast<const INPUT_TYPE *>(storage_blob),
static_cast<OUTPUT_TYPE *>(new_storage));

// Free old storage and update pointer
this->allocator->free_allocation(storage_blob);
storage_blob = new_storage;
} else {
// Storage blob is large enough, quantize in-place
quantize(static_cast<const INPUT_TYPE *>(storage_blob),
static_cast<OUTPUT_TYPE *>(storage_blob));
}
}
}

storage_blob_size = this->storage_bytes_count;
}

void preprocessForStorage(const void *original_blob, void *&blob,

Check warning on line 256 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L256

Added line #L256 was not covered by tests
size_t &input_blob_size) const override {
// Allocate quantized blob if needed
if (!blob) {
blob = this->allocator->allocate(storage_bytes_count);

Check warning on line 260 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L259-L260

Added lines #L259 - L260 were not covered by tests
}

// Cast to appropriate types
const INPUT_TYPE *input = static_cast<const INPUT_TYPE *>(original_blob);
OUTPUT_TYPE *quantized = static_cast<OUTPUT_TYPE *>(blob);
quantize(input, quantized);

Check warning on line 266 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L264-L266

Added lines #L264 - L266 were not covered by tests

input_blob_size = storage_bytes_count;
}

Check warning on line 269 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L268-L269

Added lines #L268 - L269 were not covered by tests

void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,

Check warning on line 271 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L271

Added line #L271 was not covered by tests
unsigned char alignment) const override {
// No-op: queries remain as float32
}

Check warning on line 274 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L274

Added line #L274 was not covered by tests

void preprocessQueryInPlace(void *blob, size_t input_blob_size,

Check warning on line 276 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L276

Added line #L276 was not covered by tests
unsigned char alignment) const override {
// No-op: queries remain as float32
}

Check warning on line 279 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L279

Added line #L279 was not covered by tests

void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
assert(original_blob);
assert(input_blob_size >= storage_bytes_count &&

Check warning on line 283 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L281-L283

Added lines #L281 - L283 were not covered by tests
"Input buffer too small for in-place quantization");

quantize(static_cast<const INPUT_TYPE *>(original_blob),

Check warning on line 286 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L286

Added line #L286 was not covered by tests
static_cast<OUTPUT_TYPE *>(original_blob));
}

Check warning on line 288 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L288

Added line #L288 was not covered by tests

private:
std::pair<float, float> find_min_max(const INPUT_TYPE *input) const {
float min_val = input[0];
float max_val = input[0];

size_t i = 1;
// Process 4 elements at a time for better performance
for (; i + 3 < dim; i += 4) {
const float v0 = input[i];
const float v1 = input[i + 1];
const float v2 = input[i + 2];
const float v3 = input[i + 3];
min_val = std::min({min_val, v0, v1, v2, v3});
max_val = std::max({max_val, v0, v1, v2, v3});
}
// Handle remaining elements
for (; i < dim; i++) {
min_val = std::min(min_val, input[i]);
max_val = std::max(max_val, input[i]);
}
return {min_val, max_val};
}

const size_t dim;
const size_t storage_bytes_count;
};
Loading
Loading