|
| 1 | +// Things tested by this fuzz test |
| 2 | +// |
| 3 | +// - A few different document identifiers only |
| 4 | +// - Simple setting updates (searchable and filterable attributes only) |
| 5 | +// - Document Deletion (given existing or unexisting external document ids) |
| 6 | +// - Clear Documents |
| 7 | +// - Batched document imports |
| 8 | +// - Update/Replacememt of existing documents |
| 9 | +// - Each operation with and without soft deletion |
| 10 | +// - Empty document imports |
| 11 | +// - No crash should ever happen |
| 12 | + |
| 13 | +// A small sample of what isn't tested: |
| 14 | +// |
| 15 | +// - The correctness of the indexing operations |
| 16 | +// - Indexing mistakes that happen when many different documents are inserted |
| 17 | +// - Long batches of document imports |
| 18 | +// - Nested fields (not tested well anyway) |
| 19 | +// - Any search result |
| 20 | +// - Arbitrary document contents |
| 21 | +// (instead, the components of the documents are pre-written manually) |
| 22 | +// - Index creation / Deletion |
| 23 | +// - Autogenerated docids |
| 24 | +// - Indexing for geosearch |
| 25 | +// - Documents with too many field ids or too many words in a field id |
| 26 | +// - Anything related to the prefix databases |
| 27 | +// - Incorrect setting updates |
| 28 | +// - The logic that chooses between soft and hard deletion |
| 29 | +// (the choice is instead set manually for each operation) |
| 30 | +// - Different IndexerConfig parameters |
| 31 | + |
| 32 | +// Efficiency tips: |
| 33 | +// |
| 34 | +// - Use a RAM disk (see https://stackoverflow.com/questions/46224103/create-apfs-ram-disk-on-macos-high-sierra) |
| 35 | +// - change the value of the TMPDIR environment variable to a folder in the RAM disk |
| 36 | + |
| 37 | +// Quality: |
| 38 | +// - finds issue 2945 if any of the last two fixes are not present (within a few minutes) |
| 39 | +// - issue 2945: https://github.com/meilisearch/meilisearch/issues/2945 |
| 40 | +// - fix 1: https://github.com/meilisearch/milli/pull/723 |
| 41 | +// - fix 2: https://github.com/meilisearch/milli/pull/734 |
| 42 | +// - but doesn't detect anything wrong if this fix is not included: https://github.com/meilisearch/milli/pull/690 |
| 43 | +// - because it doesn't cause any crash, I think |
| 44 | +// - each fuzz test iteration is quite slow |
| 45 | +// - for this fuzz test in particular, it is good to let it run for a few hours, or even a day |
| 46 | + |
| 47 | +use std::hash::Hash; |
| 48 | +use std::sync::LazyLock; |
| 49 | + |
| 50 | +use fuzzcheck::mutators::integer_within_range::U8WithinRangeMutator; |
| 51 | +use fuzzcheck::mutators::option::OptionMutator; |
| 52 | +use fuzzcheck::mutators::unique::UniqueMutator; |
| 53 | +use fuzzcheck::mutators::vector::VecMutator; |
| 54 | +use fuzzcheck::DefaultMutator; |
| 55 | +use heed::{EnvOpenOptions, RwTxn}; |
| 56 | +use serde::{Deserialize, Serialize}; |
| 57 | +use tempfile::TempDir; |
| 58 | + |
| 59 | +use super::{ |
| 60 | + ClearDocuments, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings, |
| 61 | +}; |
| 62 | +use crate::Index; |
| 63 | + |
| 64 | +/// The list of document identifiers that we choose to test |
| 65 | +static DOCUMENT_IDENTIFIERS: LazyLock<Vec<serde_json::Value>> = LazyLock::new(|| { |
| 66 | + let mut ids = vec![]; |
| 67 | + for i in 0..10 { |
| 68 | + ids.push(serde_json::json!(i)); |
| 69 | + ids.push(serde_json::json!(format!("{i}"))); |
| 70 | + } |
| 71 | + ids.push(serde_json::json!("complex-ID-1_2")); |
| 72 | + ids.push(serde_json::json!("1-2-3-4")); |
| 73 | + ids.push(serde_json::json!("invalidsupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocious")); |
| 74 | + ids.push(serde_json::json!("invalid.id")); |
| 75 | + ids |
| 76 | +}); |
| 77 | + |
| 78 | +/// The list of field values that we choose to test |
| 79 | +static FIELD_VALUES: LazyLock<Vec<serde_json::Value>> = LazyLock::new(|| { |
| 80 | + let mut vals = vec![]; |
| 81 | + for i in 0..10i32 { |
| 82 | + vals.push(serde_json::json!(i)); |
| 83 | + vals.push(serde_json::json!((i as f64) / 3.4)); |
| 84 | + vals.push(serde_json::json!(111.1_f32.powi(i))); |
| 85 | + vals.push(serde_json::json!(format!("{i}"))); |
| 86 | + vals.push(serde_json::json!([i - 1, format!("{i}"), i + 1, format!("{}", i - 1), i - 2])); |
| 87 | + vals.push(serde_json::json!(format!("{}", "a".repeat(i as usize)))); |
| 88 | + } |
| 89 | + vals.push(serde_json::json!({ "nested": ["value", { "nested": ["value", "value", "the quick brown fox jumps over the lazy dog, wow!"] }], "value": 0})); |
| 90 | + vals.push(serde_json::json!("the quick brown fox jumps over the lazy dog, wow!")); |
| 91 | + vals.push(serde_json::json!("the quick brown supercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocious fox jumps over the lazy dog")); |
| 92 | + vals.push(serde_json::json!({ "lat": 23.0, "lon": 22.1 })); |
| 93 | + vals.push(serde_json::json!({ "lat": 23.0, "lon": 22.1, "other": 10.0 })); |
| 94 | + vals.push(serde_json::json!({ "lat": -23.0, "lon": -22.1 })); |
| 95 | + vals.push(serde_json::json!({ "lat": 93.0, "lon": 22.1 })); |
| 96 | + vals.push(serde_json::json!({ "lat": 90.0, "lon": 221.1 })); |
| 97 | + vals |
| 98 | +}); |
| 99 | +/// The list of field keys that we choose to test |
| 100 | +static FIELD_KEYS: LazyLock<Vec<String>> = LazyLock::new(|| { |
| 101 | + let mut keys = vec![]; |
| 102 | + for f in ["identifier", "field1", "field2", "_geo"] { |
| 103 | + keys.push(f.to_owned()); |
| 104 | + for g in [ |
| 105 | + "nested", |
| 106 | + "value", |
| 107 | + "nested.value", |
| 108 | + "nested.value.nested", |
| 109 | + "_geo", |
| 110 | + "lat", |
| 111 | + "lon", |
| 112 | + "other", |
| 113 | + ] { |
| 114 | + let mut key = f.to_owned(); |
| 115 | + key.push('.'); |
| 116 | + key.push_str(g); |
| 117 | + keys.push(key); |
| 118 | + } |
| 119 | + } |
| 120 | + keys |
| 121 | +}); |
| 122 | +fn document_identifier(i: u8) -> serde_json::Value { |
| 123 | + DOCUMENT_IDENTIFIERS[i as usize].clone() |
| 124 | +} |
| 125 | +fn field_key(i: u8) -> String { |
| 126 | + FIELD_KEYS[i as usize].clone() |
| 127 | +} |
| 128 | +fn field_value(i: u8) -> serde_json::Value { |
| 129 | + FIELD_VALUES[i as usize].clone() |
| 130 | +} |
| 131 | +fn document_identifier_index_mutator() -> U8WithinRangeMutator { |
| 132 | + U8WithinRangeMutator::new(..DOCUMENT_IDENTIFIERS.len() as u8) |
| 133 | +} |
| 134 | +fn field_key_index_mutator() -> U8WithinRangeMutator { |
| 135 | + U8WithinRangeMutator::new(..FIELD_KEYS.len() as u8) |
| 136 | +} |
| 137 | +fn field_value_index_mutator() -> U8WithinRangeMutator { |
| 138 | + U8WithinRangeMutator::new(..FIELD_VALUES.len() as u8) |
| 139 | +} |
| 140 | + |
| 141 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 142 | +enum Operation { |
| 143 | + SettingsUpdate(SettingsUpdate), |
| 144 | + DocumentImport(DocumentImport), |
| 145 | + DocumentDeletion(DocumentDeletion), |
| 146 | + Clear, |
| 147 | +} |
| 148 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 149 | +enum Method { |
| 150 | + Update, |
| 151 | + Replace, |
| 152 | +} |
| 153 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 154 | +struct DocumentImport { |
| 155 | + disable_soft_deletion: bool, |
| 156 | + method: Method, |
| 157 | + documents: DocumentImportBatch, |
| 158 | +} |
| 159 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 160 | +struct SettingsUpdate { |
| 161 | + // Adding filterable fields slows down the fuzzer a lot |
| 162 | + // #[field_mutator(OptionMutator<Vec<u8>, VecMutator<u8, U8WithinRangeMutator>> = { |
| 163 | + // OptionMutator::new(VecMutator::new(field_key_index_mutator(), 0..=10)) |
| 164 | + // })] |
| 165 | + // filterable_fields: Option<Vec<u8>>, |
| 166 | + #[field_mutator(OptionMutator<Vec<u8>, VecMutator<u8, U8WithinRangeMutator>> = { |
| 167 | + OptionMutator::new(VecMutator::new(field_key_index_mutator(), 0..=10)) |
| 168 | + })] |
| 169 | + searchable_fields: Option<Vec<u8>>, |
| 170 | +} |
| 171 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 172 | +struct DocumentDeletion { |
| 173 | + disable_soft_deletion: bool, |
| 174 | + #[field_mutator(VecMutator<u8, U8WithinRangeMutator> = { |
| 175 | + VecMutator::new(document_identifier_index_mutator(), 0..=10) |
| 176 | + })] |
| 177 | + external_document_ids: Vec<u8>, |
| 178 | +} |
| 179 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 180 | +struct Document { |
| 181 | + #[field_mutator(U8WithinRangeMutator = { document_identifier_index_mutator() })] |
| 182 | + identifier: u8, |
| 183 | + #[field_mutator(OptionMutator<u8, U8WithinRangeMutator> = { |
| 184 | + OptionMutator::new(field_value_index_mutator()) |
| 185 | + })] |
| 186 | + field1: Option<u8>, |
| 187 | + #[field_mutator(OptionMutator<u8, U8WithinRangeMutator> = { |
| 188 | + OptionMutator::new(field_value_index_mutator()) |
| 189 | + })] |
| 190 | + field2: Option<u8>, |
| 191 | +} |
| 192 | +#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] |
| 193 | +struct DocumentImportBatch { |
| 194 | + #[field_mutator(VecMutator<Document, DocumentMutator> = { |
| 195 | + VecMutator::new(Document::default_mutator(), 0..=10) |
| 196 | + })] |
| 197 | + docs1: Vec<Document>, |
| 198 | + #[field_mutator(VecMutator<Document, DocumentMutator> = { |
| 199 | + VecMutator::new(Document::default_mutator(), 0..=5) |
| 200 | + })] |
| 201 | + docs2: Vec<Document>, |
| 202 | +} |
| 203 | + |
| 204 | +fn apply_document_deletion<'i>( |
| 205 | + wtxn: &mut RwTxn<'i, '_>, |
| 206 | + index: &'i Index, |
| 207 | + deletion: &DocumentDeletion, |
| 208 | +) { |
| 209 | + let DocumentDeletion { disable_soft_deletion, external_document_ids } = deletion; |
| 210 | + let mut builder = DeleteDocuments::new(wtxn, index).unwrap(); |
| 211 | + builder.disable_soft_deletion(*disable_soft_deletion); |
| 212 | + for id in external_document_ids { |
| 213 | + let id = document_identifier(*id); |
| 214 | + let id = match id { |
| 215 | + serde_json::Value::Number(n) => format!("{n}"), |
| 216 | + serde_json::Value::String(s) => s, |
| 217 | + _ => panic!(), |
| 218 | + }; |
| 219 | + let _ = builder.delete_external_id(id.as_str()); |
| 220 | + } |
| 221 | + builder.execute().unwrap(); |
| 222 | +} |
| 223 | + |
| 224 | +fn apply_document_import<'i>(wtxn: &mut RwTxn<'i, '_>, index: &'i Index, import: &DocumentImport) { |
| 225 | + let DocumentImport { |
| 226 | + disable_soft_deletion, |
| 227 | + method, |
| 228 | + documents: DocumentImportBatch { docs1, docs2 }, |
| 229 | + } = import; |
| 230 | + let indexer_config = IndexerConfig::default(); |
| 231 | + let mut builder = IndexDocuments::new( |
| 232 | + wtxn, |
| 233 | + index, |
| 234 | + &indexer_config, |
| 235 | + IndexDocumentsConfig { |
| 236 | + update_method: match method { |
| 237 | + Method::Update => super::IndexDocumentsMethod::UpdateDocuments, |
| 238 | + Method::Replace => super::IndexDocumentsMethod::ReplaceDocuments, |
| 239 | + }, |
| 240 | + disable_soft_deletion: *disable_soft_deletion, |
| 241 | + autogenerate_docids: false, |
| 242 | + ..IndexDocumentsConfig::default() |
| 243 | + }, |
| 244 | + |_| {}, |
| 245 | + || false, |
| 246 | + ) |
| 247 | + .unwrap(); |
| 248 | + |
| 249 | + let make_real_docs = |docs: &Vec<Document>| { |
| 250 | + docs.iter() |
| 251 | + .map(|doc| { |
| 252 | + let Document { identifier, field1, field2 } = doc; |
| 253 | + let mut object = crate::Object::new(); |
| 254 | + let identifier = document_identifier(*identifier); |
| 255 | + object.insert("identifier".to_owned(), serde_json::json!(identifier)); |
| 256 | + if let Some(field1) = field1 { |
| 257 | + let field1 = field_value(*field1); |
| 258 | + object.insert("field1".to_owned(), field1); |
| 259 | + } |
| 260 | + if let Some(field2) = field2 { |
| 261 | + let field2 = field_value(*field2); |
| 262 | + object.insert("field2".to_owned(), field2); |
| 263 | + } |
| 264 | + object |
| 265 | + }) |
| 266 | + .collect::<Vec<_>>() |
| 267 | + }; |
| 268 | + |
| 269 | + let docs1 = make_real_docs(docs1); |
| 270 | + |
| 271 | + let (new_builder, _user_error) = builder.add_documents(documents!(docs1)).unwrap(); |
| 272 | + builder = new_builder; |
| 273 | + |
| 274 | + let docs2 = make_real_docs(docs2); |
| 275 | + |
| 276 | + let (new_builder, _user_error) = builder.add_documents(documents!(docs2)).unwrap(); |
| 277 | + builder = new_builder; |
| 278 | + |
| 279 | + let _ = builder.execute().unwrap(); |
| 280 | +} |
| 281 | + |
| 282 | +fn apply_settings_update<'i>( |
| 283 | + wtxn: &mut RwTxn<'i, '_>, |
| 284 | + index: &'i Index, |
| 285 | + settings: &SettingsUpdate, |
| 286 | +) { |
| 287 | + let SettingsUpdate { searchable_fields /* , filterable_fields */ } = settings; |
| 288 | + let indexer_config = IndexerConfig::default(); |
| 289 | + let mut settings = Settings::new(wtxn, index, &indexer_config); |
| 290 | + // match filterable_fields { |
| 291 | + // Some(fields) => { |
| 292 | + // let fields = fields.iter().map(|f| field_key(*f)).collect(); |
| 293 | + // settings.set_filterable_fields(fields); |
| 294 | + // } |
| 295 | + // None => settings.reset_filterable_fields(), |
| 296 | + // } |
| 297 | + match searchable_fields { |
| 298 | + Some(fields) => { |
| 299 | + let fields = fields.iter().map(|f| field_key(*f)).collect(); |
| 300 | + settings.set_searchable_fields(fields); |
| 301 | + } |
| 302 | + None => settings.reset_searchable_fields(), |
| 303 | + } |
| 304 | + settings.execute(|_| {}, || false).unwrap(); |
| 305 | +} |
| 306 | + |
| 307 | +fn apply_operation<'i>(wtxn: &mut RwTxn<'i, '_>, index: &'i Index, operation: &Operation) { |
| 308 | + match operation { |
| 309 | + Operation::SettingsUpdate(settings) => apply_settings_update(wtxn, index, settings), |
| 310 | + Operation::DocumentImport(import) => apply_document_import(wtxn, index, import), |
| 311 | + Operation::DocumentDeletion(deletion) => apply_document_deletion(wtxn, index, deletion), |
| 312 | + Operation::Clear => { |
| 313 | + let builder = ClearDocuments::new(wtxn, index); |
| 314 | + let _result = builder.execute().unwrap(); |
| 315 | + } |
| 316 | + } |
| 317 | +} |
| 318 | + |
| 319 | +#[test] |
| 320 | +fn fuzz() { |
| 321 | + let tempdir = TempDir::new_in("/Volumes/Ramdisk").unwrap(); |
| 322 | + |
| 323 | + let mut options = EnvOpenOptions::new(); |
| 324 | + options.map_size(4096 * 1000 * 1000); |
| 325 | + |
| 326 | + let index = { |
| 327 | + let index = Index::new(options, tempdir.path()).unwrap(); |
| 328 | + let mut wtxn = index.write_txn().unwrap(); |
| 329 | + let indexer_config = IndexerConfig::default(); |
| 330 | + let mut settings = Settings::new(&mut wtxn, &index, &indexer_config); |
| 331 | + settings.set_primary_key("identifier".to_owned()); |
| 332 | + settings.execute(|_| {}, || false).unwrap(); |
| 333 | + wtxn.commit().unwrap(); |
| 334 | + index |
| 335 | + }; |
| 336 | + |
| 337 | + let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { |
| 338 | + let mut wtxn = index.write_txn().unwrap(); |
| 339 | + for operation in operations { |
| 340 | + apply_operation(&mut wtxn, &index, operation); |
| 341 | + } |
| 342 | + wtxn.abort().unwrap(); |
| 343 | + }) |
| 344 | + // We use a bloom filter (through UniqueMutator) to prevent the same test input from being tested too many times |
| 345 | + .mutator(UniqueMutator::new(VecMutator::new(Operation::default_mutator(), 0..=20), |x| x)) |
| 346 | + .serde_serializer() |
| 347 | + .default_sensor_and_pool() |
| 348 | + .arguments_from_cargo_fuzzcheck() |
| 349 | + .launch(); |
| 350 | + assert!(!result.found_test_failure); |
| 351 | +} |
0 commit comments