@@ -23,8 +23,10 @@ use anyhow::{bail, Context};
23
23
use chrono:: { DateTime , LocalResult , TimeZone , Utc } ;
24
24
25
25
const LAMBDA_SOURCE_ID_PREFIX : & str = "ingest-lambda-source-" ;
26
- const MIN_FILE_SOURCES_TO_KEEP : usize = 20 ;
27
- const MIN_FILE_SOURCE_RETENTION_HOURS : usize = 12 ;
26
+
27
+ /// This duration should be large enough to prevent repeated notification
28
+ /// deliveries from causing duplicates
29
+ const FILE_SOURCE_RETENTION_HOURS : usize = 6 ;
28
30
29
31
/// Create a source id for a Lambda file source, with the provided timestamp encoded in it
30
32
pub ( crate ) fn create_lambda_source_id ( time : DateTime < Utc > ) -> String {
@@ -42,9 +44,8 @@ fn parse_source_id_timestamp(source_id: &str) -> anyhow::Result<DateTime<Utc>> {
42
44
}
43
45
}
44
46
45
- /// Parse the provided source ids and return the ones that are prunable:
46
- /// - There are at least `MIN_FILE_SOURCES_TO_KEEP` Lambda file sources
47
- /// - The file source is older than `MIN_FILE_SOURCE_RETENTION_HOURS`` hours
47
+ /// Parse the provided source ids and return those where the file source is
48
+ /// older than `MIN_FILE_SOURCE_RETENTION_HOURS` hours
48
49
pub ( crate ) fn filter_prunable_lambda_source_ids < ' a > (
49
50
source_ids : impl Iterator < Item = & ' a String > ,
50
51
) -> anyhow:: Result < impl Iterator < Item = & ' a String > > {
@@ -56,8 +57,7 @@ pub(crate) fn filter_prunable_lambda_source_ids<'a>(
56
57
let prunable_sources = src_timestamps
57
58
. into_iter ( )
58
59
. rev ( )
59
- . skip ( MIN_FILE_SOURCES_TO_KEEP )
60
- . filter ( |( ts, _) | ( Utc :: now ( ) - * ts) . num_hours ( ) > MIN_FILE_SOURCE_RETENTION_HOURS as i64 )
60
+ . filter ( |( ts, _) | ( Utc :: now ( ) - * ts) . num_hours ( ) > FILE_SOURCE_RETENTION_HOURS as i64 )
61
61
. map ( |( _, src_id) | src_id) ;
62
62
63
63
Ok ( prunable_sources)
@@ -86,7 +86,7 @@ mod tests {
86
86
87
87
#[ test]
88
88
fn test_dont_filter_recent ( ) {
89
- let source_ids: Vec < String > = ( 0 ..MIN_FILE_SOURCES_TO_KEEP + 5 )
89
+ let source_ids: Vec < String > = ( 0 ..20 )
90
90
. map ( |i| {
91
91
// only recent timestamps
92
92
Utc :: now ( ) - chrono:: Duration :: try_seconds ( i as i64 ) . unwrap ( )
@@ -100,13 +100,11 @@ mod tests {
100
100
}
101
101
102
102
#[ test]
103
- fn test_filter_old_but_keep_min_number ( ) {
104
- let source_ids: Vec < String > = ( 0 ..MIN_FILE_SOURCES_TO_KEEP + 3 )
103
+ fn test_filter_old ( ) {
104
+ let source_ids: Vec < String > = ( 0 ..5 )
105
105
. map ( |i| {
106
- // old timestamps so that MIN_FILE_SOURCES_TO_KEEP is the limit
107
- Utc :: now ( )
108
- - chrono:: Duration :: try_hours ( MIN_FILE_SOURCE_RETENTION_HOURS as i64 ) . unwrap ( )
109
- - chrono:: Duration :: try_hours ( i as i64 ) . unwrap ( )
106
+ let hours_ago = i * FILE_SOURCE_RETENTION_HOURS * 2 ;
107
+ Utc :: now ( ) - chrono:: Duration :: try_hours ( hours_ago as i64 ) . unwrap ( )
110
108
} )
111
109
. map ( create_lambda_source_id)
112
110
. collect ( ) ;
@@ -115,18 +113,20 @@ mod tests {
115
113
let prunable_sources = filter_prunable_lambda_source_ids ( source_ids. iter ( ) )
116
114
. unwrap ( )
117
115
. collect :: < HashSet < _ > > ( ) ;
118
- assert_eq ! ( prunable_sources. len( ) , 3 ) ;
119
- for source_id in source_ids. iter ( ) . take ( 3 ) {
120
- assert ! ( !prunable_sources. contains( source_id) ) ;
116
+ assert_eq ! ( prunable_sources. len( ) , 4 ) ;
117
+ assert ! ( !prunable_sources. contains( & source_ids[ 0 ] ) ) ;
118
+ for source_id in source_ids. iter ( ) . skip ( 1 ) {
119
+ assert ! ( prunable_sources. contains( source_id) ) ;
121
120
}
122
121
123
122
// Prune source ids that happen to be from oldest to newst
124
123
let prunable_sources = filter_prunable_lambda_source_ids ( source_ids. iter ( ) . rev ( ) )
125
124
. unwrap ( )
126
125
. collect :: < HashSet < _ > > ( ) ;
127
- assert_eq ! ( prunable_sources. len( ) , 3 ) ;
128
- for source_id in source_ids. iter ( ) . take ( 3 ) {
129
- assert ! ( !prunable_sources. contains( source_id) ) ;
126
+ assert_eq ! ( prunable_sources. len( ) , 4 ) ;
127
+ assert ! ( !prunable_sources. contains( & source_ids[ 0 ] ) ) ;
128
+ for source_id in source_ids. iter ( ) . skip ( 1 ) {
129
+ assert ! ( prunable_sources. contains( source_id) ) ;
130
130
}
131
131
}
132
132
0 commit comments