@@ -3,7 +3,7 @@ use chroma_error::{ChromaError, ErrorCodes};
3
3
use chroma_types:: {
4
4
Chunk , DataRecord , DeletedMetadata , LogRecord , MaterializedLogOperation , Metadata ,
5
5
MetadataDelta , MetadataValue , MetadataValueConversionError , Operation , OperationRecord ,
6
- UpdateMetadata , UpdateMetadataValue ,
6
+ OwnedDataRecord , UpdateMetadata , UpdateMetadataValue ,
7
7
} ;
8
8
use std:: collections:: { HashMap , HashSet } ;
9
9
use std:: sync:: atomic:: AtomicU32 ;
@@ -113,10 +113,10 @@ impl ChromaError for LogMaterializerError {
113
113
}
114
114
115
115
#[ derive( Debug , Clone ) ]
116
- pub struct MaterializedLogRecord < ' referred_data > {
116
+ pub struct MaterializedLogRecord {
117
117
// This is the data record read from the record segment for this id.
118
118
// None if the record exists only in the log.
119
- pub ( crate ) data_record : Option < DataRecord < ' referred_data > > ,
119
+ pub ( crate ) data_record : Option < OwnedDataRecord > ,
120
120
// If present in the record segment then it is the offset id
121
121
// in the record segment at which the record was found.
122
122
// If not present in the segment then it is the offset id
@@ -157,7 +157,7 @@ pub struct MaterializedLogRecord<'referred_data> {
157
157
pub ( crate ) final_embedding : Option < Vec < f32 > > ,
158
158
}
159
159
160
- impl < ' referred_data > MaterializedLogRecord < ' referred_data > {
160
+ impl MaterializedLogRecord {
161
161
// Performs a deep copy of the document so only use it if really
162
162
// needed. If you only need a reference then use merged_document_ref
163
163
// defined below.
@@ -170,7 +170,7 @@ impl<'referred_data> MaterializedLogRecord<'referred_data> {
170
170
return match self . final_document . clone ( ) {
171
171
Some ( doc) => Some ( doc) ,
172
172
None => match self . data_record . as_ref ( ) {
173
- Some ( data_record) => data_record. document . map ( |doc| doc . to_string ( ) ) ,
173
+ Some ( data_record) => data_record. document . clone ( ) ,
174
174
None => None ,
175
175
} ,
176
176
} ;
@@ -185,10 +185,7 @@ impl<'referred_data> MaterializedLogRecord<'referred_data> {
185
185
return match & self . final_document {
186
186
Some ( doc) => Some ( doc) ,
187
187
None => match self . data_record . as_ref ( ) {
188
- Some ( data_record) => match data_record. document {
189
- Some ( doc) => Some ( doc) ,
190
- None => None ,
191
- } ,
188
+ Some ( data_record) => data_record. document . as_deref ( ) ,
192
189
None => None ,
193
190
} ,
194
191
} ;
@@ -211,7 +208,7 @@ impl<'referred_data> MaterializedLogRecord<'referred_data> {
211
208
match & self . user_id {
212
209
Some ( id) => id. as_str ( ) ,
213
210
None => match & self . data_record {
214
- Some ( data_record) => data_record. id ,
211
+ Some ( data_record) => & data_record. id ,
215
212
None => panic ! ( "Expected at least one user id to be set" ) ,
216
213
} ,
217
214
}
@@ -247,7 +244,7 @@ impl<'referred_data> MaterializedLogRecord<'referred_data> {
247
244
final_metadata
248
245
}
249
246
250
- pub ( crate ) fn metadata_delta ( & ' referred_data self ) -> MetadataDelta < ' referred_data > {
247
+ pub ( crate ) fn metadata_delta ( & self ) -> MetadataDelta < ' _ > {
251
248
let mut metadata_delta = MetadataDelta :: new ( ) ;
252
249
let mut base_metadata: HashMap < & str , & MetadataValue > = HashMap :: new ( ) ;
253
250
if let Some ( data_record) = & self . data_record {
@@ -327,21 +324,19 @@ impl<'referred_data> MaterializedLogRecord<'referred_data> {
327
324
return match & self . final_embedding {
328
325
Some ( embed) => embed,
329
326
None => match self . data_record . as_ref ( ) {
330
- Some ( data_record) => data_record. embedding ,
327
+ Some ( data_record) => & data_record. embedding ,
331
328
None => panic ! ( "Expected at least one source of embedding" ) ,
332
329
} ,
333
330
} ;
334
331
}
335
332
}
336
333
337
- impl < ' referred_data > From < ( DataRecord < ' referred_data > , u32 ) >
338
- for MaterializedLogRecord < ' referred_data >
339
- {
334
+ impl < ' referred_data > From < ( DataRecord < ' referred_data > , u32 ) > for MaterializedLogRecord {
340
335
fn from ( data_record_info : ( DataRecord < ' referred_data > , u32 ) ) -> Self {
341
336
let data_record = data_record_info. 0 ;
342
337
let offset_id = data_record_info. 1 ;
343
338
Self {
344
- data_record : Some ( data_record) ,
339
+ data_record : Some ( data_record. to_owned ( ) ) ,
345
340
offset_id,
346
341
user_id : None ,
347
342
final_operation : MaterializedLogOperation :: Initial ,
@@ -357,7 +352,7 @@ impl<'referred_data> From<(DataRecord<'referred_data>, u32)>
357
352
// in the log (OperationRecord), offset id in storage where it will be stored (u32)
358
353
// and user id (str).
359
354
impl < ' referred_data > TryFrom < ( & ' referred_data OperationRecord , u32 , & ' referred_data str ) >
360
- for MaterializedLogRecord < ' referred_data >
355
+ for MaterializedLogRecord
361
356
{
362
357
type Error = LogMaterializerError ;
363
358
@@ -414,7 +409,7 @@ pub async fn materialize_logs<'me>(
414
409
// for materializing. Writers pass this value to the materializer
415
410
// because they need to share this across all log partitions.
416
411
next_offset_id : Option < Arc < AtomicU32 > > ,
417
- ) -> Result < Chunk < MaterializedLogRecord < ' me > > , LogMaterializerError > {
412
+ ) -> Result < Chunk < MaterializedLogRecord > , LogMaterializerError > {
418
413
// Trace the total_len since len() iterates over the entire chunk
419
414
// and we don't want to do that just to trace the length.
420
415
tracing:: info!( "Total length of logs in materializer: {}" , logs. total_len( ) ) ;
@@ -748,10 +743,10 @@ pub async fn materialize_logs<'me>(
748
743
749
744
// This needs to be public for testing
750
745
#[ allow( async_fn_in_trait) ]
751
- pub trait SegmentWriter < ' a > {
746
+ pub trait SegmentWriter {
752
747
async fn apply_materialized_log_chunk (
753
748
& self ,
754
- records : Chunk < MaterializedLogRecord < ' a > > ,
749
+ records : Chunk < MaterializedLogRecord > ,
755
750
) -> Result < ( ) , ApplyMaterializedLogError > ;
756
751
async fn commit ( self ) -> Result < impl SegmentFlusher , Box < dyn ChromaError > > ;
757
752
}
@@ -1880,7 +1875,10 @@ mod tests {
1880
1875
assert_eq ! ( hello_found, 1 ) ;
1881
1876
assert_eq ! ( hello_again_found, 1 ) ;
1882
1877
assert ! ( log. data_record. is_some( ) ) ;
1883
- assert_eq ! ( log. data_record. as_ref( ) . unwrap( ) . document, Some ( "doc1" ) ) ;
1878
+ assert_eq ! (
1879
+ log. data_record. as_ref( ) . unwrap( ) . document,
1880
+ Some ( "doc1" . to_string( ) )
1881
+ ) ;
1884
1882
assert_eq ! (
1885
1883
log. data_record. as_ref( ) . unwrap( ) . embedding,
1886
1884
vec![ 1.0 , 2.0 , 3.0 ] . as_slice( )
0 commit comments