Skip to content

Commit 78436e1

Browse files
committed
feat: add Protocol Buffers definitions for DB
- `src/protos/database.proto` describes Protocol Buffers definitions for the vector database. - `build.rs` generates Rust code from `src/protos/database.proto`.
1 parent 848f881 commit 78436e1

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

Diff for: build.rs

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
fn main() {
2+
println!("cargo:rerun-if-changed=src/protos/database.proto");
3+
protobuf_codegen::Codegen::new()
4+
.protoc()
5+
.protoc_path(&protoc_bin_vendored::protoc_bin_path().unwrap())
6+
.includes(&["src/protos"])
7+
.input("src/protos/database.proto")
8+
.cargo_out_dir("protos")
9+
.run_from_script();
10+
}

Diff for: src/protos/database.proto

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Database.
2+
3+
syntax = "proto3";
4+
5+
// Database.
6+
message Database {
7+
// Number of elements in a vector.
8+
uint32 vector_size = 1;
9+
// Number of partitions in the database.
10+
uint32 num_partitions = 2;
11+
// Number of subvector divisions. vector_size must be multiple of this.
12+
uint32 num_divisions = 3;
13+
// Number of codes in each codebook.
14+
uint32 num_codes = 4;
15+
16+
// Partition references.
17+
// Actual data are separately loaded.
18+
repeated PartitionRef partition_refs = 10;
19+
20+
// codebooks.
21+
repeated CodebookRef codebook_refs = 20;
22+
}
23+
24+
// Reference to a partition.
25+
message PartitionRef {
26+
// ID of the partition. Must be URL-safe.
27+
// Supposed to be the Base64-encoded SHA-256 digest of the entire partition.
28+
string id = 1;
29+
// Centroid of the partition.
30+
// Number of elements is given by Database::vector_size
31+
repeated float centroid = 2;
32+
}
33+
34+
// Reference to a codebook.
35+
message CodebookRef {
36+
// ID of the codebook. Must be URL-safe.
37+
// Supposed to be the Base64-encoded SHA-256 digest of the entire codebook.
38+
string id = 1;
39+
}
40+
41+
// Single partition.
42+
message Partition {
43+
// Vector size.
44+
uint32 vector_size = 1;
45+
// Number of subvector divisions. Must match Database::num_divisions.
46+
uint32 num_divisions = 2;
47+
// Number of vectors in the partition.
48+
uint32 num_vectors = 3;
49+
50+
// Centroid of the partition.
51+
// Number of elements is given by vector_size.
52+
repeated float centroid = 10;
53+
54+
// Encoded vectors. Number of elements is given by num_vectors.
55+
repeated EncodedVector encoded_vectors = 20;
56+
}
57+
58+
// Codebook.
59+
message Codebook {
60+
// Vector size.
61+
// Must be Database::vector_size / Database::num_divisions.
62+
uint32 vector_size = 1;
63+
// Number of codes in the codebook. Must match Database::num_codes.
64+
uint32 num_codes = 2;
65+
66+
// Codes. Number of elements is given by num_codes.
67+
repeated CodeVector codes = 10;
68+
}
69+
70+
// Encoded vector in a partition.
71+
message EncodedVector {
72+
// Index of the vector. 0..Partition::num_vectors - 1.
73+
uint32 index = 1;
74+
// Indices of the codes in the codebooks.
75+
// Each elemtn specifies the code in the corresponding codebook.
76+
// Number of elements is given by Partition::num_divisions.
77+
repeated uint32 code_indices = 2;
78+
}
79+
80+
// Code vector.
81+
message CodeVector {
82+
// Vector elements. Number of elements is given by Codebook::vector_size.
83+
repeated float elements = 1;
84+
}

0 commit comments

Comments
 (0)