Skip to content

Commit eeac85a

Browse files
committed
Add property index #529
1 parent d2e73fb commit eeac85a

10 files changed

+189
-211
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Changes to JS assets are not included here, but in [`atomic-data-browser`'s CHAN
66

77
## UNRELEASED
88

9+
- Improve query performance, refactor indexes #529
910
- Improved error handling for HTTPS initialization #530
1011

1112
## [v0.34.0] - 2022-10-31

lib/src/atoms.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use crate::{
44
errors::AtomicResult,
5-
values::{ReferenceString, Value},
5+
values::{ReferenceString, SortableValue, Value},
66
};
77

88
/// The Atom is the smallest meaningful piece of data.
@@ -34,13 +34,15 @@ impl Atom {
3434

3535
/// Converts one Atom to a series of stringified values that can be indexed.
3636
pub fn to_indexable_atoms(&self) -> Vec<IndexAtom> {
37+
let sort_value = self.value.to_sortable_string();
3738
let index_atoms = match &self.value.to_reference_index_strings() {
3839
Some(v) => v,
3940
None => return vec![],
4041
}
4142
.iter()
4243
.map(|v| IndexAtom {
43-
value: v.into(),
44+
ref_value: v.into(),
45+
sort_value: sort_value.clone(),
4446
subject: self.subject.clone(),
4547
property: self.property.clone(),
4648
})
@@ -56,7 +58,8 @@ impl Atom {
5658
pub struct IndexAtom {
5759
pub subject: String,
5860
pub property: String,
59-
pub value: ReferenceString,
61+
pub ref_value: ReferenceString,
62+
pub sort_value: SortableValue,
6063
}
6164

6265
impl std::fmt::Display for Atom {

lib/src/db.rs

+27-115
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,27 @@
44
mod migrations;
55
mod prop_val_sub_index;
66
mod query_index;
7-
mod reference_index;
87
#[cfg(test)]
98
pub mod test;
9+
mod val_prop_sub_index;
1010

1111
use std::{
1212
collections::{HashMap, HashSet},
1313
sync::{Arc, Mutex},
1414
};
1515

16-
use tracing::{info, instrument, trace};
16+
use tracing::{info, instrument};
1717

1818
use crate::{
1919
atoms::IndexAtom,
2020
commit::CommitResponse,
21-
db::reference_index::key_to_atom,
21+
db::val_prop_sub_index::find_in_val_prop_sub_index,
2222
endpoints::{default_endpoints, Endpoint},
2323
errors::{AtomicError, AtomicResult},
2424
resources::PropVals,
2525
storelike::{Query, QueryResult, Storelike},
26-
Atom, Resource, Value,
26+
values::SortableValue,
27+
Atom, Resource,
2728
};
2829

2930
use self::{
@@ -34,9 +35,9 @@ use self::{
3435
},
3536
query_index::{
3637
check_if_atom_matches_watched_query_filters, query_indexed, update_indexed_member,
37-
watch_collection, IndexIterator, QueryFilter,
38+
IndexIterator, QueryFilter,
3839
},
39-
reference_index::{add_atom_to_reference_index, remove_atom_from_reference_index},
40+
val_prop_sub_index::{add_atom_to_reference_index, remove_atom_from_reference_index},
4041
};
4142

4243
// A function called by the Store when a Commit is accepted
@@ -474,29 +475,45 @@ impl Storelike for Db {
474475
#[instrument(skip(self))]
475476
fn query(&self, q: &Query) -> AtomicResult<QueryResult> {
476477
if let Ok(res) = query_indexed(self, q) {
478+
// TODO: Maybe this is not the best check.
479+
// If nothing is found, this may indicate both that the query is not indexed,
480+
// or that there are simply no results.
481+
// Probably should use `q_filter.is_watched` to check if the query is indexed.
477482
if res.count > 0 {
478483
// Yay, we have a cache hit!
479-
// We don't have to perform a (more expansive) TPF query + sorting
484+
// We don't have to create the indexes, so we can return early.
480485
return Ok(res);
481486
}
482487
}
483488

484489
let q_filter: QueryFilter = q.into();
485490

486491
// Maybe make this optional?
487-
watch_collection(self, &q_filter)?;
492+
q_filter.watch(self)?;
488493

489494
info!(filter = ?q_filter, "Building query index");
490495

491496
let atoms: IndexIterator = match (&q.property, q.value.as_ref()) {
492497
(Some(prop), val) => find_in_prop_val_sub_index(self, prop, val),
493498
(None, None) => self.all_index_atoms(q.include_external),
494-
(None, Some(_)) => todo!(),
499+
(None, Some(val)) => find_in_val_prop_sub_index(self, val, None),
495500
};
496501

497502
for a in atoms {
498503
let atom = a?;
499-
update_indexed_member(self, &q_filter, &atom.subject, &atom.value, false)?;
504+
let sort_val: SortableValue = if let Some(sort) = &q_filter.sort_by {
505+
if &atom.property == sort {
506+
atom.sort_value
507+
} else {
508+
// Find the sort value in the store
509+
let sort_atom = self.get_value(&atom.subject, sort)?;
510+
sort_atom.to_sortable_string()
511+
}
512+
} else {
513+
atom.sort_value
514+
};
515+
516+
update_indexed_member(self, &q_filter, &atom.subject, &sort_val, false)?;
500517
}
501518

502519
// Retry the same query!
@@ -562,111 +579,6 @@ impl Storelike for Db {
562579
fn set_default_agent(&self, agent: crate::agents::Agent) {
563580
self.default_agent.lock().unwrap().replace(agent);
564581
}
565-
566-
// TPF implementation that used the index_value cache, far more performant than the StoreLike implementation
567-
#[instrument(skip(self))]
568-
fn tpf(
569-
&self,
570-
q_subject: Option<&str>,
571-
q_property: Option<&str>,
572-
q_value: Option<&Value>,
573-
// Whether resources from outside the store should be searched through
574-
include_external: bool,
575-
) -> AtomicResult<Vec<Atom>> {
576-
trace!("tpf");
577-
let mut vec: Vec<Atom> = Vec::new();
578-
579-
let hassub = q_subject.is_some();
580-
let hasprop = q_property.is_some();
581-
let hasval = q_value.is_some();
582-
583-
// Simply return all the atoms
584-
if !hassub && !hasprop && !hasval {
585-
for resource in self.all_resources(include_external) {
586-
for (property, value) in resource.get_propvals() {
587-
vec.push(Atom::new(
588-
resource.get_subject().clone(),
589-
property.clone(),
590-
value.clone(),
591-
))
592-
}
593-
}
594-
return Ok(vec);
595-
}
596-
597-
// If the value is a resourcearray, check if it is inside
598-
let val_equals = |val: &str| {
599-
let q = q_value.unwrap().to_sortable_string();
600-
val == q || {
601-
if val.starts_with('[') {
602-
match crate::parse::parse_json_array(val) {
603-
Ok(vec) => return vec.contains(&q),
604-
Err(_) => return val == q,
605-
}
606-
}
607-
false
608-
}
609-
};
610-
611-
// Find atoms matching the TPF query in a single resource
612-
let mut find_in_resource = |resource: &Resource| {
613-
let subj = resource.get_subject();
614-
for (prop, val) in resource.get_propvals().iter() {
615-
if hasprop && q_property.as_ref().unwrap() == prop {
616-
if hasval {
617-
if val_equals(&val.to_string()) {
618-
vec.push(Atom::new(subj.into(), prop.into(), val.clone()))
619-
}
620-
break;
621-
} else {
622-
vec.push(Atom::new(subj.into(), prop.into(), val.clone()))
623-
}
624-
break;
625-
} else if hasval && !hasprop && val_equals(&val.to_string()) {
626-
vec.push(Atom::new(subj.into(), prop.into(), val.clone()))
627-
}
628-
}
629-
};
630-
631-
match q_subject {
632-
Some(sub) => match self.get_resource(sub) {
633-
Ok(resource) => {
634-
if hasprop | hasval {
635-
find_in_resource(&resource);
636-
Ok(vec)
637-
} else {
638-
Ok(resource.to_atoms())
639-
}
640-
}
641-
Err(_) => Ok(vec),
642-
},
643-
None => {
644-
if hasval {
645-
let key_prefix = if hasprop {
646-
format!("{}\n{}\n", q_value.unwrap(), q_property.unwrap())
647-
} else {
648-
format!("{}\n", q_value.unwrap())
649-
};
650-
for item in self.reference_index.scan_prefix(key_prefix) {
651-
let (k, _v) = item?;
652-
let key_string = String::from_utf8(k.to_vec())?;
653-
// WARNING: Converts all Atoms to Strings, the datatype is lost here
654-
let atom = key_to_atom(&key_string)?;
655-
// NOTE: This means we'll include random values that start with the current server URL, including paragraphs for example.
656-
if include_external || atom.subject.starts_with(self.get_server_url()) {
657-
vec.push(atom)
658-
}
659-
}
660-
return Ok(vec);
661-
}
662-
// TODO: Add an index for searching only by property
663-
for resource in self.all_resources(include_external) {
664-
find_in_resource(&resource);
665-
}
666-
Ok(vec)
667-
}
668-
}
669-
}
670582
}
671583

672584
fn corrupt_db_message(subject: &str) -> String {

lib/src/db/prop_val_sub_index.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ fn key_from_atom(atom: &IndexAtom) -> Vec<u8> {
4444
[
4545
atom.property.as_bytes(),
4646
&[SEPARATION_BIT],
47-
atom.value.as_bytes(),
47+
atom.ref_value.as_bytes(),
48+
&[SEPARATION_BIT],
49+
atom.sort_value.as_bytes(),
4850
&[SEPARATION_BIT],
4951
atom.subject.as_bytes(),
5052
]
@@ -57,13 +59,16 @@ fn key_to_index_atom(key: &[u8]) -> AtomicResult<IndexAtom> {
5759
let mut parts = key.split(|b| b == &SEPARATION_BIT);
5860
let prop = std::str::from_utf8(parts.next().ok_or("Invalid key for prop_val_sub_index")?)
5961
.map_err(|_| "Can't parse prop into string")?;
60-
let val = std::str::from_utf8(parts.next().ok_or("Invalid key for prop_val_sub_index")?)
61-
.map_err(|_| "Can't parse val into string")?;
62+
let ref_val = std::str::from_utf8(parts.next().ok_or("Invalid key for prop_val_sub_index")?)
63+
.map_err(|_| "Can't parse ref_val into string")?;
64+
let sort_val = std::str::from_utf8(parts.next().ok_or("Invalid key for prop_val_sub_index")?)
65+
.map_err(|_| "Can't parse sort_val into string")?;
6266
let sub = std::str::from_utf8(parts.next().ok_or("Invalid key for prop_val_sub_index")?)
6367
.map_err(|_| "Can't parse subject into string")?;
6468
Ok(IndexAtom {
6569
property: prop.into(),
66-
value: val.into(),
70+
ref_value: ref_val.into(),
71+
sort_value: sort_val.into(),
6772
subject: sub.into(),
6873
})
6974
}
@@ -76,7 +81,8 @@ mod test {
7681
fn round_trip() {
7782
let atom = IndexAtom {
7883
property: "http://example.com/prop".into(),
79-
value: "http://example.com/val \n hello \n".into(),
84+
ref_value: "http://example.com/val \n hello \n".into(),
85+
sort_value: "2".into(),
8086
subject: "http://example.com/subj".into(),
8187
};
8288
let key = key_from_atom(&atom);

0 commit comments

Comments
 (0)