Skip to content

Commit

Permalink
add payload schema to collection info + indexing fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
generall committed Jun 13, 2021
1 parent 615b924 commit cfc5bee
Show file tree
Hide file tree
Showing 20 changed files with 605 additions and 266 deletions.
426 changes: 236 additions & 190 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "qdrant"
version = "0.3.0"
version = "0.3.2"
authors = ["Andrey Vasnetsov <[email protected]>"]
edition = "2018"
doctest = false
Expand Down
Binary file added docs/favicon.ico
Binary file not shown.
85 changes: 84 additions & 1 deletion docs/redoc/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@
"minimum": 0,
"type": "integer"
},
"payload_schema": {
"additionalProperties": {
"$ref": "#/components/schemas/PayloadSchemaInfo"
},
"description": "Types of stored payload",
"type": "object"
},
"ram_data_size": {
"description": "RAM used by collection",
"format": "uint",
Expand All @@ -147,6 +154,7 @@
"required": [
"config",
"disk_data_size",
"payload_schema",
"ram_data_size",
"segments_count",
"status",
Expand Down Expand Up @@ -802,6 +810,81 @@
],
"description": "Define operations description for point payloads manipulation"
},
"PayloadSchemaInfo": {
"properties": {
"data_type": {
"$ref": "#/components/schemas/PayloadSchemaType"
},
"indexed": {
"type": "boolean"
}
},
"required": [
"data_type",
"indexed"
],
"type": "object"
},
"PayloadSchemaType": {
"anyOf": [
{
"properties": {
"type": {
"enum": [
"keyword"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"type": {
"enum": [
"integer"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"type": {
"enum": [
"float"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"type": {
"enum": [
"geo"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
]
},
"PayloadType": {
"anyOf": [
{
Expand Down Expand Up @@ -1521,7 +1604,7 @@
"url": "http://www.apache.org/licenses/LICENSE-2.0.html"
},
"title": "Qdrant API",
"version": "0.3.0"
"version": "0.3.2"
},
"openapi": "3.0.1",
"paths": {
Expand Down
7 changes: 6 additions & 1 deletion lib/collection/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use itertools::Itertools;
use parking_lot::{Mutex, RwLock};
use tokio::runtime::Runtime;

use segment::types::{HasIdCondition, PointIdType, ScoredPoint, VectorElementType, SegmentType};
use segment::types::{HasIdCondition, PointIdType, ScoredPoint, VectorElementType, SegmentType, PayloadKeyType, PayloadSchemaInfo};
use segment::types::Condition;
use segment::types::Filter;

Expand Down Expand Up @@ -67,6 +67,7 @@ impl Collection {
let mut ram_size = 0;
let mut disk_size = 0;
let mut status = CollectionStatus::Green;
let mut schema: HashMap<PayloadKeyType, PayloadSchemaInfo> = Default::default();
for (_idx, segment) in segments.iter() {
segments_count += 1;
let segment_info = segment.get().read().info();
Expand All @@ -76,6 +77,9 @@ impl Collection {
vectors_count += segment_info.num_vectors;
disk_size += segment_info.disk_usage_bytes;
ram_size += segment_info.ram_usage_bytes;
for (key, val) in segment_info.schema.into_iter() {
schema.insert(key, val);
}
}
Ok(CollectionInfo {
status,
Expand All @@ -84,6 +88,7 @@ impl Collection {
disk_data_size: disk_size,
ram_data_size: ram_size,
config: self.config.read().clone(),
payload_schema: schema,
})
}

Expand Down
5 changes: 4 additions & 1 deletion lib/collection/src/operations/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ use tokio::task::JoinError;
use std::result;

use segment::entry::entry_point::OperationError;
use segment::types::{Filter, PayloadKeyType, PayloadType, PointIdType, SearchParams, SeqNumberType, TheMap, VectorElementType};
use segment::types::{Filter, PayloadKeyType, PayloadType, PointIdType, SearchParams, SeqNumberType, TheMap, VectorElementType, PayloadSchemaInfo};

use crate::config::CollectionConfig;
use crate::wal::WalError;
use std::collections::HashMap;

/// Type of vector in API
pub type VectorType = Vec<VectorElementType>;
Expand Down Expand Up @@ -55,6 +56,8 @@ pub struct CollectionInfo {
pub ram_data_size: usize,
/// Collection settings
pub config: CollectionConfig,
/// Types of stored payload
pub payload_schema: HashMap<PayloadKeyType, PayloadSchemaInfo>
}


Expand Down
5 changes: 1 addition & 4 deletions lib/segment/benches/hnsw_search_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use segment::index::hnsw_index::point_scorer::FilteredScorer;
use segment::fixtures::index_fixtures::{TestRawScorerProducer, FakeConditionChecker, random_vector};


const NUM_VECTORS: usize = 50000;
const NUM_VECTORS: usize = 100000;
const DIM: usize = 64;
const M: usize = 16;
const TOP: usize = 10;
Expand Down Expand Up @@ -72,9 +72,6 @@ fn hnsw_benchmark(c: &mut Criterion) {
|score| {
if score.score > top_score { top_score = score.score }
});
if top_score > 0.99 {
eprintln!("top_score = {:#?}", top_score);
}
})
});

Expand Down
79 changes: 57 additions & 22 deletions lib/segment/src/index/field_index/numeric_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize};
use crate::index::field_index::{CardinalityEstimation, PrimaryCondition, PayloadBlockCondition};
use crate::index::field_index::field_index::{FieldIndex, PayloadFieldIndex, PayloadFieldIndexBuilder};
use crate::types::{FloatPayloadType, IntPayloadType, PayloadType, PointOffsetType, Range, FieldCondition, PayloadKeyType};
use itertools::Itertools;

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Element<N> {
Expand Down Expand Up @@ -134,28 +135,38 @@ impl<N: ToPrimitive + Clone> PayloadFieldIndex for PersistedNumericIndex<N> {
let value_per_point = num_elements as f64 / self.points_count as f64;
let effective_threshold = (threshold as f64 * value_per_point) as usize;

let iter = (0..num_elements).step_by(effective_threshold / 2).map(move |init_offset| {
let upper_index = min(num_elements - 1, init_offset + effective_threshold);

let upper_value = self.elements[upper_index].value.to_f64();
let lower_value = self.elements[init_offset].value.to_f64();

PayloadBlockCondition {
condition: FieldCondition {
key: key.clone(),
r#match: None,
range: Some(Range {
lt: None,
gt: None,
gte: lower_value,
lte: upper_value,
}),
geo_bounding_box: None,
geo_radius: None,
},
cardinality: ((upper_index - init_offset) as f64 / value_per_point) as usize,
}
});
let iter = (0..num_elements).step_by(effective_threshold / 2)
.filter_map(move |init_offset| {
let upper_index = min(num_elements - 1, init_offset + effective_threshold);

let upper_value = self.elements[upper_index].value.to_f64();
let lower_value = self.elements[init_offset].value.to_f64();

if upper_value == lower_value {
return None; // Range blocks makes no sense within a single value
}
Some(Range {
lt: None,
gt: None,
gte: lower_value,
lte: upper_value,
})
})
.dedup()
.map(move |range| {
let cardinality = self.range_cardinality(&range);

PayloadBlockCondition {
condition: FieldCondition {
key: key.clone(),
r#match: None,
range: Some(range),
geo_bounding_box: None,
geo_radius: None,
},
cardinality: cardinality.exp,
}
});

Box::new(iter)
}
Expand Down Expand Up @@ -202,6 +213,30 @@ impl PayloadFieldIndexBuilder for PersistedNumericIndex<IntPayloadType> {
mod tests {
use super::*;

#[test]
fn test_payload_blocks() {
let threshold = 4;
let index = PersistedNumericIndex {
points_count: 9,
elements: vec![
Element { id: 1, value: 1.0 },
Element { id: 2, value: 1.0 },
Element { id: 3, value: 1.0 },
Element { id: 4, value: 1.0 },
Element { id: 5, value: 1.0 },
Element { id: 6, value: 2.0 },
Element { id: 7, value: 2.0 },
Element { id: 8, value: 2.0 },
Element { id: 9, value: 2.0 },
],
};

let blocks = index.payload_blocks(threshold, "test".to_owned()).collect_vec();
assert_eq!(blocks.len(), 1);
assert_eq!(blocks[0].condition.range.expect("range condition").gte.expect("gte"), 1.0);
assert_eq!(blocks[0].condition.range.expect("range condition").lte.expect("lte"), 2.0);
}

#[test]
fn test_bsearch() {
let index = PersistedNumericIndex {
Expand Down
9 changes: 7 additions & 2 deletions lib/segment/src/index/hnsw_index/build_condition_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,25 @@ use crate::types::{Filter, PointOffsetType};
use crate::index::visited_pool::VisitedList;

pub struct BuildConditionChecker {
pub filter_list: VisitedList
pub filter_list: VisitedList,
pub current_point: PointOffsetType
}

impl BuildConditionChecker {
pub fn new(list_size: usize) -> Self {
BuildConditionChecker {
filter_list: VisitedList::new(list_size)
filter_list: VisitedList::new(list_size),
current_point: PointOffsetType::default()
}
}
}


impl ConditionChecker for BuildConditionChecker {
fn check(&self, point_id: PointOffsetType, _query: &Filter) -> bool {
if point_id == self.current_point {
return false // Do not match current point while inserting it (second time)
}
self.filter_list.check(point_id)
}
}
Loading

0 comments on commit cfc5bee

Please sign in to comment.