Initial version of SQDataset

ahuber21 · ahuber21 · commit 0b8f7e83697b · 2025-03-21T07:51:40.000-07:00
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
@@ -48,6 +48,12 @@ create_simple_example(custom_thread_pool test_custom_thread_pool custom_thread_p
 configure_file(../../data/test_dataset/data_f32.fvecs . COPYONLY)
 configure_file(../../data/test_dataset/queries_f32.fvecs . COPYONLY)
 configure_file(../../data/test_dataset/groundtruth_euclidean.ivecs . COPYONLY)
+
+# tmp executable for sqdataset
+add_executable(sqdataset sqdataset.cpp)
+target_include_directories(sqdataset PRIVATE ${CMAKE_CURRENT_LIST_DIR})
+target_link_libraries(sqdataset ${SVS_LIB} svs_compile_options svs_native_options)
+
 # The vamana test executable.
 add_executable(vamana vamana.cpp)
 target_include_directories(vamana PRIVATE ${CMAKE_CURRENT_LIST_DIR})
diff --git a/include/svs/quantization/scalar/impl/scalar_impl.h b/include/svs/quantization/scalar/impl/scalar_impl.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "svs/quantization/scalar/scalar.h"
+
+#include <cstddef>
+
+namespace svs {
+namespace quantization {
+namespace scalar {
+
+template <size_t Extent, typename Alloc>
+SQDataset<Extent, Alloc>::SQDataset(size_t size, size_t dims)
+    : data_{size, dims} {}
+
+template <size_t Extent, typename Alloc>
+SQDataset<Extent, Alloc>::SQDataset(data_type data, float scale, float bias)
+    : scale_(scale)
+    , bias_(bias)
+    , data_{std::move(data)} {}
+
+template <size_t Extent, typename Alloc> size_t SQDataset<Extent, Alloc>::size() const {
+    return data_.size();
+}
+
+template <size_t Extent, typename Alloc>
+size_t SQDataset<Extent, Alloc>::dimensions() const {
+    return data_.dimensions();
+}
+
+template <size_t Extent, typename Alloc>
+typename SQDataset<Extent, Alloc>::const_value_type
+SQDataset<Extent, Alloc>::get_datum(size_t i) const {
+    // return data_.get_datum(i);
+    // decompress data
+    auto result = std::vector<float>(dimensions());
+    compressed_value_type compressed = data_.get_datum(i);
+    for (size_t j = 0; j < dimensions(); ++j) {
+        auto val = static_cast<float>(compressed[j]);
+        result[j] = scale_ * val + bias_;
+    }
+
+    return result;
+}
+
+template <size_t Extent, typename Alloc>
+template <typename QueryType, size_t N>
+void SQDataset<Extent, Alloc>::set_datum(size_t i, std::span<QueryType, N> datum) {
+    auto dims = dimensions();
+    assert(datum.size() == dims);
+
+    // Compression range extrema
+    static constexpr std::int8_t MIN = std::numeric_limits<std::int8_t>::min();
+    static constexpr std::int8_t MAX = std::numeric_limits<std::int8_t>::max();
+
+    // Uniform scalar quantization function
+    auto scalar = [&](float v) -> std::int8_t {
+        return std::clamp<float>(std::round((v - bias_) / scale_), MIN, MAX);
+    };
+
+    // Prepare compressed elements
+    std::vector<std::int8_t> buffer(dims);
+    for (size_t j = 0; j < dims; ++j) {
+        // Apply scalar quantization to element
+        buffer[j] = scalar(datum[j]);
+    }
+    data_.set_datum(i, buffer);
+
+    // TODO: Float16 truncation check? (see codec.h, line 114)
+}
+
+template <size_t Extent, typename Alloc>
+template <data::ImmutableMemoryDataset Dataset>
+SQDataset<Extent, Alloc>
+SQDataset<Extent, Alloc>::compress(const Dataset& data, const allocator_type& allocator) {
+    return compress(data, 1, allocator);
+}
+
+template <size_t Extent, typename Alloc>
+template <data::ImmutableMemoryDataset Dataset>
+SQDataset<Extent, Alloc> SQDataset<Extent, Alloc>::compress(
+    const Dataset& data, size_t num_threads, const allocator_type& allocator
+) {
+    auto pool = threads::DefaultThreadPool{num_threads};
+    return compress(data, pool, allocator);
+}
+
+template <size_t Extent, typename Alloc>
+template <data::ImmutableMemoryDataset Dataset, threads::ThreadPool Pool>
+SQDataset<Extent, Alloc> SQDataset<Extent, Alloc>::compress(
+    const Dataset& data, Pool& threadpool, const allocator_type& allocator
+) {
+    if (Extent != Dynamic && data.dimensions() != Extent) {
+        throw ANNEXCEPTION("Dimension mismatch!");
+    }
+
+    static constexpr size_t batch_size = 512;
+
+    // Helper struct to collect values
+    struct Accumulator {
+        double min = 0.0;
+        double max = 0.0;
+
+        void accumulate(double val) {
+            min = std::min(min, val);
+            max = std::max(max, val);
+        }
+
+        void merge(const Accumulator& other) {
+            min = std::min(min, other.min);
+            max = std::max(max, other.max);
+        }
+    };
+
+    // Thread-local accumulators
+    std::vector<Accumulator> tls(threadpool.size());
+
+    // Compute mean and squared sum
+    threads::parallel_for(
+        threadpool,
+        threads::DynamicPartition(data.size(), batch_size),
+        [&](const auto& indices, uint64_t tid) {
+            threads::UnitRange range{indices};
+            Accumulator local;
+
+            for (size_t i = range.start(); i < range.stop(); ++i) {
+                const auto& datum = data.get_datum(i);
+                for (size_t d = 0; d < data.dimensions(); ++d) {
+                    local.accumulate(datum[d]);
+                }
+            }
+
+            tls.at(tid).merge(local);
+        }
+    );
+
+    // Reduce
+    Accumulator global;
+    for (const auto& partial : tls) {
+        global.merge(partial);
+    }
+
+    // Compress the scaled and biased values
+    // TODO: Templated compression bits
+    // static constexpr size_t bits = 8;
+
+    // Compression range extrema
+    static constexpr std::int8_t MIN = std::numeric_limits<std::int8_t>::min();
+    static constexpr std::int8_t MAX = std::numeric_limits<std::int8_t>::max();
+
+    // Compute scale and bias
+    float scale = (global.max - global.min) / (MAX - MIN);
+    float bias = global.min - MIN * scale;
+
+    // Uniform scalar quantization function
+    auto scalar = [&](float v) -> std::int8_t {
+        return std::clamp<float>(std::round((v - bias) / scale), MIN, MAX);
+    };
+
+    data_type compressed{data.size(), data.dimensions(), allocator};
+
+    threads::parallel_for(
+        threadpool,
+        threads::DynamicPartition(data.size(), batch_size),
+        [&](const auto& indices, uint64_t /*tid*/) {
+            threads::UnitRange range{indices};
+            for (size_t i = range.start(); i < range.stop(); ++i) {
+                // Load original row
+                auto original = data.get_datum(i);
+
+                // Allocate temporary buffer for transformed data
+                std::vector<std::int8_t> transformed(original.size());
+
+                for (size_t d = 0; d < original.size(); ++d) {
+                    float val = static_cast<float>(original[d]);
+                    transformed[d] = scalar(val);
+                }
+
+                // Store normalized data back (set_datum will do narrowing if needed)
+                compressed.set_datum(i, transformed);
+            }
+        }
+    );
+
+    return SQDataset<Extent, Alloc>{std::move(compressed), scale, bias};
+}
+
+template <size_t Extent, typename Alloc>
+lib::SaveTable SQDataset<Extent, Alloc>::save(const lib::SaveContext& ctx) const {
+    return lib::SaveTable(
+        serialization_schema,
+        save_version,
+        {SVS_LIST_SAVE_(data, ctx),
+         {"scale", lib::save(scale_, ctx)},
+         {"bias", lib::save(bias_, ctx)}}
+    );
+}
+
+template <size_t Extent, typename Alloc>
+SQDataset<Extent, Alloc> SQDataset<Extent, Alloc>::load(
+    const lib::LoadTable& table, const allocator_type& allocator
+) {
+    return SQDataset<Extent, Alloc>{
+        SVS_LOAD_MEMBER_AT_(table, data, allocator),
+        lib::load_at<float>(table, "scale"),
+        lib::load_at<float>(table, "bias")};
+}
+
+} // namespace scalar
+} // namespace quantization
+} // namespace svs
diff --git a/include/svs/quantization/scalar/scalar.h b/include/svs/quantization/scalar/scalar.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2025 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// svs
+#include "svs/core/data/simple.h"
+#include "svs/lib/memory.h"
+#include "svs/lib/static.h"
+#include "svs/lib/version.h"
+
+// stl
+#include <memory>
+
+namespace svs {
+namespace quantization {
+namespace scalar {
+
+inline constexpr std::string_view scalar_quantization_serialization_schema =
+    "scalar_quantization_dataset";
+inline constexpr lib::Version scalar_quantization_save_version = lib::Version(0, 0, 0);
+
+// Scalar Quantization Dataset
+// This class provides a globally quantized (scale & bias) dataset.
+template <size_t Extent = svs::Dynamic, typename Alloc = lib::Allocator<std::int8_t>>
+class SQDataset {
+  public:
+    constexpr static size_t extent = Extent;
+
+    using allocator_type = Alloc;
+    // TODO: replace int8 with template
+    using data_type = data::SimpleData<std::int8_t, Extent, allocator_type>;
+
+    // TODO: get_datum will return this type, other classes would return compressed data
+    //       while we return uncompressed data for simplicity. Maybe this needs to change
+    // using const_value_type = std::span<const std::int8_t, Extent>;
+    // using value_type = const_value_type;
+    // TODO: This is potentially a performance bottleneck. Other datasets simply return a
+    // view, but because we are manipulating the values before return, they must go into a
+    // vector
+    using compressed_value_type = std::span<const std::int8_t, Extent>;
+    using const_value_type = std::vector<float>;
+    using value_type = const_value_type;
+
+  private:
+    float scale_;
+    float bias_;
+    data_type data_;
+
+  public:
+    SQDataset(size_t size, size_t dims);
+    SQDataset(data_type data, float scale, float bias);
+
+    size_t size() const;
+    size_t dimensions() const;
+
+    float get_scale() const { return scale_; }
+    float get_bias() const { return bias_; }
+
+    const_value_type get_datum(size_t i) const;
+
+    template <typename QueryType, size_t N>
+    void set_datum(size_t i, std::span<QueryType, N> datum);
+
+    template <data::ImmutableMemoryDataset Dataset>
+    static SQDataset compress(const Dataset& data, const allocator_type& allocator = {});
+
+    template <data::ImmutableMemoryDataset Dataset>
+    static SQDataset
+    compress(const Dataset& data, size_t num_threads, const allocator_type& allocator = {});
+
+    template <data::ImmutableMemoryDataset Dataset, threads::ThreadPool Pool>
+    static SQDataset
+    compress(const Dataset& data, Pool& threadpool, const allocator_type& allocator = {});
+
+    static constexpr lib::Version save_version = scalar_quantization_save_version;
+    static constexpr std::string_view serialization_schema =
+        scalar_quantization_serialization_schema;
+    lib::SaveTable save(const lib::SaveContext& ctx) const;
+
+    static SQDataset
+    load(const lib::LoadTable& table, const allocator_type& allocator = {});
+};
+
+} // namespace scalar
+} // namespace quantization
+} // namespace svs
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -139,6 +139,9 @@ set(TEST_SOURCES
     # Inverted
     ${TEST_DIR}/svs/index/inverted/clustering.cpp
 
+    # Global scalar quantization
+    ${TEST_DIR}/svs/quantization/scalar/scalar.cpp
+
     # # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp
 )
 
diff --git a/tests/svs/quantization/scalar/scalar.cpp b/tests/svs/quantization/scalar/scalar.cpp

Original file line number	Diff line number	Diff line change
`@@ -139,6 +139,9 @@ set(TEST_SOURCES`
`139`	`139`	`# Inverted`
`140`	`140`	`${TEST_DIR}/svs/index/inverted/clustering.cpp`
`141`	`141`
	`142`	`+ # Global scalar quantization`
	`143`	`+ ${TEST_DIR}/svs/quantization/scalar/scalar.cpp`
	`144`	`+`
`142`	`145`	`# # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp`
`143`	`146`	`)`
`144`	`147`