From 978c365f37b1c6f45ef36b4c569943572aa739b4 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 4 Sep 2024 13:37:32 +0800 Subject: [PATCH] add compat tests --- src/compat_tests.rs | 80 ++++++++++++++++++ src/lib.rs | 9 +- .../range_query/range_query_fastfield.rs | 2 +- src/termdict/mod.rs | 25 +++++- .../compat_tests_data/index_v6/.managed.json | 1 + .../index_v6/.tantivy-meta.lock | 0 .../index_v6/.tantivy-writer.lock | 0 .../00000000000000000000000000000000.fast | Bin 0 -> 146 bytes ...00000000000000000000000000000000.fieldnorm | Bin 0 -> 113 bytes .../00000000000000000000000000000000.idx | Bin 0 -> 130 bytes .../00000000000000000000000000000000.pos | Bin 0 -> 112 bytes .../00000000000000000000000000000000.store | Bin 0 -> 170 bytes .../00000000000000000000000000000000.term | Bin 0 -> 349 bytes tests/compat_tests_data/index_v6/meta.json | 40 +++++++++ 14 files changed, 150 insertions(+), 7 deletions(-) create mode 100644 src/compat_tests.rs create mode 100644 tests/compat_tests_data/index_v6/.managed.json create mode 100644 tests/compat_tests_data/index_v6/.tantivy-meta.lock create mode 100644 tests/compat_tests_data/index_v6/.tantivy-writer.lock create mode 100644 tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast create mode 100644 tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm create mode 100644 tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx create mode 100644 tests/compat_tests_data/index_v6/00000000000000000000000000000000.pos create mode 100644 tests/compat_tests_data/index_v6/00000000000000000000000000000000.store create mode 100644 tests/compat_tests_data/index_v6/00000000000000000000000000000000.term create mode 100644 tests/compat_tests_data/index_v6/meta.json diff --git a/src/compat_tests.rs b/src/compat_tests.rs new file mode 100644 index 0000000000..6e75c5de2a --- /dev/null +++ b/src/compat_tests.rs @@ -0,0 +1,80 @@ +use std::path::PathBuf; + +use schema::*; + +use crate::*; + +fn create_index(path: &str) { + let mut schema_builder = Schema::builder(); + let label = schema_builder.add_text_field("label", TEXT | STORED); + let date = schema_builder.add_date_field("date", INDEXED | STORED); + let schema = schema_builder.build(); + std::fs::create_dir_all(path).unwrap(); + let index = Index::create_in_dir(path, schema).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 20_000_000).unwrap(); + index_writer + .add_document(doc!(label => "dateformat", date => DateTime::from_timestamp_nanos(123456))) + .unwrap(); + index_writer.commit().unwrap(); +} + +#[test] +/// Writes an Index for the current INDEX_FORMAT_VERSION to disk. +fn create_format() { + let version = INDEX_FORMAT_VERSION.to_string(); + let file_path = path_for_version(&version); + if PathBuf::from(file_path.clone()).exists() { + return; + } + create_index(&file_path); +} + +fn path_for_version(version: &str) -> String { + format!("./tests/compat_tests_data/index_v{}/", version) +} + +/// feature flag quickwit uses a different dictionary type +#[test] +#[cfg(not(feature = "quickwit"))] +fn test_format_6() { + let path = path_for_version("6"); + + let index = Index::open_in_dir(path).expect("Failed to open index"); + // dates are truncated to Microseconds in v6 + assert_date_time_precision(&index, DateTimePrecision::Microseconds); +} + +#[cfg(not(feature = "quickwit"))] +fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) { + use collector::TopDocs; + let reader = index.reader().expect("Failed to create reader"); + let searcher = reader.searcher(); + + let schema = index.schema(); + let label_field = schema.get_field("label").expect("Field 'label' not found"); + let query_parser = query::QueryParser::for_index(index, vec![label_field]); + + let query = query_parser + .parse_query("dateformat") + .expect("Failed to parse query"); + let top_docs = searcher + .search(&query, &TopDocs::with_limit(1)) + .expect("Search failed"); + + assert_eq!(top_docs.len(), 1, "Expected 1 search result"); + + let doc_address = top_docs[0].1; + let retrieved_doc: TantivyDocument = searcher + .doc(doc_address) + .expect("Failed to retrieve document"); + + let date_field = schema.get_field("date").expect("Field 'date' not found"); + let date_value = retrieved_doc + .get_first(date_field) + .expect("Date field not found in document") + .as_datetime() + .unwrap(); + + let expected = DateTime::from_timestamp_nanos(123456).truncate(precision); + assert_eq!(date_value, expected,); +} diff --git a/src/lib.rs b/src/lib.rs index 6ad2981c1a..dd4fc4f52a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -202,12 +202,15 @@ pub mod space_usage; pub mod store; pub mod termdict; +mod docset; mod reader; +#[cfg(test)] +mod compat_tests; + pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer}; pub mod snippet; -mod docset; use std::fmt; pub use census::{Inventory, TrackedObject}; @@ -229,9 +232,9 @@ pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter}; pub use crate::schema::{Document, TantivyDocument, Term}; /// Index format version. -const INDEX_FORMAT_VERSION: u32 = 6; +pub const INDEX_FORMAT_VERSION: u32 = 6; /// Oldest index format version this tantivy version can read. -const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4; +pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4; /// Structure version for the index. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs index 0d9aa7bb41..4647587bea 100644 --- a/src/query/range_query/range_query_fastfield.rs +++ b/src/query/range_query/range_query_fastfield.rs @@ -471,7 +471,7 @@ fn bound_to_value_range( } #[cfg(test)] -pub mod tests { +mod tests { use std::ops::{Bound, RangeInclusive}; use common::bounds::BoundsRange; diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index cb546c5fce..c5e08233d3 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -47,6 +47,7 @@ use self::termdict::{ pub use self::termdict::{TermMerger, TermStreamer}; use crate::postings::TermInfo; +#[derive(Debug, Eq, PartialEq)] #[repr(u32)] #[allow(dead_code)] enum DictionaryType { @@ -54,6 +55,18 @@ enum DictionaryType { SSTable = 2, } +impl TryFrom for DictionaryType { + type Error = &'static str; + + fn try_from(value: u32) -> Result { + match value { + 1 => Ok(DictionaryType::Fst), + 2 => Ok(DictionaryType::SSTable), + _ => Err("Invalid value for DictionaryType"), + } + } +} + #[cfg(not(feature = "quickwit"))] const CURRENT_TYPE: DictionaryType = DictionaryType::Fst; @@ -70,13 +83,19 @@ impl TermDictionary { let (main_slice, dict_type) = file.split_from_end(4); let mut dict_type = dict_type.read_bytes()?; let dict_type = u32::deserialize(&mut dict_type)?; + let dict_type = DictionaryType::try_from(dict_type).map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + format!("Unsuported dictionary type, found {dict_type}"), + ) + })?; - if dict_type != CURRENT_TYPE as u32 { + if dict_type != CURRENT_TYPE { return Err(io::Error::new( io::ErrorKind::Other, format!( - "Unsuported dictionary type, expected {}, found {dict_type}", - CURRENT_TYPE as u32, + "Unsuported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \ + {dict_type:?}", ), )); } diff --git a/tests/compat_tests_data/index_v6/.managed.json b/tests/compat_tests_data/index_v6/.managed.json new file mode 100644 index 0000000000..95b76f5680 --- /dev/null +++ b/tests/compat_tests_data/index_v6/.managed.json @@ -0,0 +1 @@ +["00000000000000000000000000000000.store","00000000000000000000000000000000.fast","00000000000000000000000000000000.fieldnorm","00000000000000000000000000000000.term","00000000000000000000000000000000.idx","meta.json","00000000000000000000000000000000.pos"] diff --git a/tests/compat_tests_data/index_v6/.tantivy-meta.lock b/tests/compat_tests_data/index_v6/.tantivy-meta.lock new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compat_tests_data/index_v6/.tantivy-writer.lock b/tests/compat_tests_data/index_v6/.tantivy-writer.lock new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast new file mode 100644 index 0000000000000000000000000000000000000000..f550fe3aa5fd1dd1edcd6e0992818fff1aa0ef3e GIT binary patch literal 146 zcmZQzKmZm56U<@;k_u2UMj*`u!iD8d)k!r0WXHVkOGB`X61 E06sw-ZU6uP literal 0 HcmV?d00001 diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm new file mode 100644 index 0000000000000000000000000000000000000000..08af09ea23fe5f7f7fcbd3c7b7346d75f01e9425 GIT binary patch literal 113 zcmZQ%Y-(Ttf`&#$5XlQ9s+G!8i;6Sz^OUTrm2wla@{5$L40M!oGxI~j#6?_vXYgFp{1Fbg@vh!X>AzLWJ^{C1^~;o9>4$q literal 0 HcmV?d00001 diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx new file mode 100644 index 0000000000000000000000000000000000000000..a881999d19725a878eafa40d02d0f1ba26598ec2 GIT binary patch literal 130 zcmZQ%fPjWZMi{H90mN$P0tqzm0*Pv+vecsD%=|nht7@g(#H{=xB`X6RrQFOs5X;C| zN2wsOBsl{tn3~j#6?_vXYglfq{jgiIIg-Z79%SOI8L302&e>bN~PV literal 0 HcmV?d00001 diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store new file mode 100644 index 0000000000000000000000000000000000000000..97497908d951c262f85a7ff376a4e0e4c2fb5ae9 GIT binary patch literal 170 zcmY#mU|>*aVqj$Wz|@tJSdyBSUzD3z!Uz;)trlh20Hi?x#BXeEY-ng)1!8J}#DD&CsO1YVNAeND_j#5ElNpc2QFf%VDwIUv3T|8Wy eS*?yza#6C9m8F5Xxrw2%kwI-J&<0CZ1_l5j@+Wox literal 0 HcmV?d00001 diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term new file mode 100644 index 0000000000000000000000000000000000000000..b2df64c6c0fe47be564fd62558d6c8fd397e1d7b GIT binary patch literal 349 zcmZQ#Km!7eM=u;da{JK1qn8+=QqoXb9ZJK*VLFldOfW$p4Q4Yjaakbg1F1z;1C|nC zXka*ifDH#=CMrU85@Rz=4^#^S10w??y3I`uAlZg5Ahiv=K%!cyEVZaOGe1wss#+;G zF)P1F$;v=SDK|3@#4Y^}7v(0F#KX0j)#@lE7bPoM8JZay TnwuCJS{m1e0Zq1KWncgR$89by literal 0 HcmV?d00001 diff --git a/tests/compat_tests_data/index_v6/meta.json b/tests/compat_tests_data/index_v6/meta.json new file mode 100644 index 0000000000..6fa7c001f7 --- /dev/null +++ b/tests/compat_tests_data/index_v6/meta.json @@ -0,0 +1,40 @@ +{ + "index_settings": { + "docstore_compression": "lz4", + "docstore_blocksize": 16384 + }, + "segments": [ + { + "segment_id": "00000000-0000-0000-0000-000000000000", + "max_doc": 1, + "deletes": null + } + ], + "schema": [ + { + "name": "label", + "type": "text", + "options": { + "indexing": { + "record": "position", + "fieldnorms": true, + "tokenizer": "default" + }, + "stored": true, + "fast": false + } + }, + { + "name": "date", + "type": "date", + "options": { + "indexed": true, + "fieldnorms": true, + "fast": false, + "stored": true, + "precision": "seconds" + } + } + ], + "opstamp": 2 +}