From e4a0db1c4458c09ff1f9838800286eac652a4abf Mon Sep 17 00:00:00 2001 From: brogdonm Date: Tue, 8 Oct 2024 17:45:40 -0600 Subject: [PATCH 1/3] Allow for multiple data exclusions, minimyzing where appropriate --- sdk/src/store.rs | 60 ++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/sdk/src/store.rs b/sdk/src/store.rs index d829ca683..4b82a1e12 100644 --- a/sdk/src/store.rs +++ b/sdk/src/store.rs @@ -1580,39 +1580,47 @@ impl Store { // sort blocks by offset block_locations.sort_by(|a, b| a.offset.cmp(&b.offset)); - // generate default data hash that excludes jumbf block - // find the first jumbf block (ours are always in order) - // find the first block after the jumbf blocks - let mut block_start: usize = 0; - let mut block_end: usize = 0; - let mut found_jumbf = false; + // Setup to assume fragmented CAI blocks + let mut exclusions = Vec::<(usize, usize)>::new(); for item in block_locations { // find start of jumbf - if !found_jumbf && item.htype == HashBlockObjectType::Cai { - block_start = item.offset; - found_jumbf = true; - } - - // find start of block after jumbf blocks - if found_jumbf && item.htype == HashBlockObjectType::Cai { - block_end = item.offset + item.length; + if item.htype == HashBlockObjectType::Cai { + // Make sure we have a valid range + if item.offset <= (item.offset + item.length) { + // If we are calculating hashes, avoid adding an + // exclusion if the CAI block is beyond the end of the + // stream. Some asset handlers will inject a + // placeholder for the CAI block at the end of the + // stream before the stream itself has the block. + if !calc_hashes || (item.offset + item.length) as u64 <= stream_len { + let mut exclusion = (item.offset, item.offset + item.length); + // Setup to de-fragment sections that are contiguous but may have + // been listed as separate + if let Some(last_exclusion) = exclusions.last() { + // If the last exclusion ends where this one starts, + // merge them + if last_exclusion.1 == exclusion.0 { + exclusion.0 = last_exclusion.0; + exclusions.pop(); + } + } + exclusions.push(exclusion); + } + } } } - if found_jumbf { + if !exclusions.is_empty() { // add exclusion hash for bytes before and after jumbf let mut dh = DataHash::new("jumbf manifest", alg); - - if calc_hashes { - if block_end > block_start && (block_end as u64) <= stream_len { - dh.add_exclusion(HashRange::new(block_start, block_end - block_start)); + for exclusion in &exclusions { + if exclusion.1 > exclusion.0 { + dh.add_exclusion(HashRange::new(exclusion.0, exclusion.1 - exclusion.0)); } + } - // this check is only valid on the final sized asset - // - // a case may occur where there is no existing manifest in the stream and the - // asset handler creates a placeholder beyond the length of the stream - if block_end as u64 > stream_len + (block_end - block_start) as u64 { + if calc_hashes { + if exclusions.iter().any(|x| x.1 as u64 > stream_len) { return Err(Error::BadParam( "data hash exclusions out of range".to_string(), )); @@ -1620,10 +1628,6 @@ impl Store { dh.gen_hash_from_stream(stream)?; } else { - if block_end > block_start { - dh.add_exclusion(HashRange::new(block_start, block_end - block_start)); - } - match alg { "sha256" => dh.set_hash([0u8; 32].to_vec()), "sha384" => dh.set_hash([0u8; 48].to_vec()), From 75317c30242713a1f4b2e2f118fc7ab330192dc8 Mon Sep 17 00:00:00 2001 From: brogdonm Date: Tue, 8 Oct 2024 18:20:58 -0600 Subject: [PATCH 2/3] Add the unit test in, showing the logic working --- sdk/src/store.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/sdk/src/store.rs b/sdk/src/store.rs index 4b82a1e12..b001fe7fa 100644 --- a/sdk/src/store.rs +++ b/sdk/src/store.rs @@ -5612,6 +5612,61 @@ pub mod tests { assert!(errors.is_empty()); } + #[test] + fn test_generate_data_hashes_for_stream_multiple_exclusions() { + // Setup the test data + let mut data= vec![0u8; 100]; + // And wrap in a cursor to treat it like a stream for the API + let mut stream = Cursor::new(&mut data); + let alg = "sha256"; + + // Return a total of three blocked locations, all of which report they are + // CAI blocks, which should be excluded from the data hash + let mut block_locations = vec![ + HashObjectPositions { + offset: 0, + length: 42, + htype: HashBlockObjectType::Cai, + }, + // This one and the next one should be merged into a single exclusion + HashObjectPositions { + offset: 80, + length: 10, + htype: HashBlockObjectType::Cai, + }, + HashObjectPositions { + offset: 90, + length: 10, + htype: HashBlockObjectType::Cai, + }, + ]; + let calc_hashes = true; + + // Generate the data hash + let data_hash_result = + Store::generate_data_hashes_for_stream(&mut stream, alg, &mut block_locations, calc_hashes); + // Which should have executed without issue + assert!(data_hash_result.is_ok()); + // Grab the actual data hash object + let data_hash = data_hash_result.unwrap(); + + // Which should have a single entry + assert_eq!(1, data_hash.len()); + let data_hash_0 = &data_hash[0]; + // And it should have exclusions specified + assert!(data_hash_0.exclusions.is_some()); + // Grab the exclusions + let exclusions = data_hash_0.exclusions.as_ref().unwrap(); + // Should be a totale of 2 exclusions to catch the de-fragmented data + assert_eq!(2, exclusions.len()); + // And the first exclusion should match what we specified + assert_eq!(0, exclusions[0].start()); + assert_eq!(42, exclusions[0].length()); + // And the second exclusion should be the last two blocks + assert_eq!(80, exclusions[1].start()); + assert_eq!(20, exclusions[1].length()); + } + #[test] #[cfg(feature = "file_io")] fn test_datahash_embeddable_manifest() { From 6d21d2cb70acc6b3ca85b6141d98be4f57424e5a Mon Sep 17 00:00:00 2001 From: Michael Brogdon Date: Tue, 8 Oct 2024 22:52:39 -0600 Subject: [PATCH 3/3] Fix formatting. --- sdk/src/store.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sdk/src/store.rs b/sdk/src/store.rs index b001fe7fa..9d2bfa266 100644 --- a/sdk/src/store.rs +++ b/sdk/src/store.rs @@ -5615,7 +5615,7 @@ pub mod tests { #[test] fn test_generate_data_hashes_for_stream_multiple_exclusions() { // Setup the test data - let mut data= vec![0u8; 100]; + let mut data = vec![0u8; 100]; // And wrap in a cursor to treat it like a stream for the API let mut stream = Cursor::new(&mut data); let alg = "sha256"; @@ -5643,8 +5643,12 @@ pub mod tests { let calc_hashes = true; // Generate the data hash - let data_hash_result = - Store::generate_data_hashes_for_stream(&mut stream, alg, &mut block_locations, calc_hashes); + let data_hash_result = Store::generate_data_hashes_for_stream( + &mut stream, + alg, + &mut block_locations, + calc_hashes, + ); // Which should have executed without issue assert!(data_hash_result.is_ok()); // Grab the actual data hash object