|
| 1 | +// Database. |
| 2 | + |
| 3 | +syntax = "proto3"; |
| 4 | + |
| 5 | +// Database. |
| 6 | +message Database { |
| 7 | + // Number of elements in a vector. |
| 8 | + uint32 vector_size = 1; |
| 9 | + // Number of partitions in the database. |
| 10 | + uint32 num_partitions = 2; |
| 11 | + // Number of subvector divisions. vector_size must be multiple of this. |
| 12 | + uint32 num_divisions = 3; |
| 13 | + // Number of codes in each codebook. |
| 14 | + uint32 num_codes = 4; |
| 15 | + |
| 16 | + // Partition references. |
| 17 | + // Actual data are separately loaded. |
| 18 | + repeated PartitionRef partition_refs = 10; |
| 19 | + |
| 20 | + // codebooks. |
| 21 | + repeated CodebookRef codebook_refs = 20; |
| 22 | +} |
| 23 | + |
| 24 | +// Reference to a partition. |
| 25 | +message PartitionRef { |
| 26 | + // ID of the partition. Must be URL-safe. |
| 27 | + // Supposed to be the Base64-encoded SHA-256 digest of the entire partition. |
| 28 | + string id = 1; |
| 29 | + // Centroid of the partition. |
| 30 | + // Number of elements is given by Database::vector_size |
| 31 | + repeated float centroid = 2; |
| 32 | +} |
| 33 | + |
| 34 | +// Reference to a codebook. |
| 35 | +message CodebookRef { |
| 36 | + // ID of the codebook. Must be URL-safe. |
| 37 | + // Supposed to be the Base64-encoded SHA-256 digest of the entire codebook. |
| 38 | + string id = 1; |
| 39 | +} |
| 40 | + |
| 41 | +// Single partition. |
| 42 | +message Partition { |
| 43 | + // Vector size. |
| 44 | + uint32 vector_size = 1; |
| 45 | + // Number of subvector divisions. Must match Database::num_divisions. |
| 46 | + uint32 num_divisions = 2; |
| 47 | + // Number of vectors in the partition. |
| 48 | + uint32 num_vectors = 3; |
| 49 | + |
| 50 | + // Centroid of the partition. |
| 51 | + // Number of elements is given by vector_size. |
| 52 | + repeated float centroid = 10; |
| 53 | + |
| 54 | + // Encoded vectors. Number of elements is given by num_vectors. |
| 55 | + repeated EncodedVector encoded_vectors = 20; |
| 56 | +} |
| 57 | + |
| 58 | +// Codebook. |
| 59 | +message Codebook { |
| 60 | + // Vector size. |
| 61 | + // Must be Database::vector_size / Database::num_divisions. |
| 62 | + uint32 vector_size = 1; |
| 63 | + // Number of codes in the codebook. Must match Database::num_codes. |
| 64 | + uint32 num_codes = 2; |
| 65 | + |
| 66 | + // Codes. Number of elements is given by num_codes. |
| 67 | + repeated CodeVector codes = 10; |
| 68 | +} |
| 69 | + |
| 70 | +// Encoded vector in a partition. |
| 71 | +message EncodedVector { |
| 72 | + // Index of the vector. 0..Partition::num_vectors - 1. |
| 73 | + uint32 index = 1; |
| 74 | + // Indices of the codes in the codebooks. |
| 75 | + // Each elemtn specifies the code in the corresponding codebook. |
| 76 | + // Number of elements is given by Partition::num_divisions. |
| 77 | + repeated uint32 code_indices = 2; |
| 78 | +} |
| 79 | + |
| 80 | +// Code vector. |
| 81 | +message CodeVector { |
| 82 | + // Vector elements. Number of elements is given by Codebook::vector_size. |
| 83 | + repeated float elements = 1; |
| 84 | +} |
0 commit comments