Skip to content

Commit

Permalink
no exception on empty dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
LoannPeurey committed Aug 1, 2024
1 parent a2d151f commit d522cba
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions ChildProject/pipelines/conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime
import multiprocessing as mp
import logging
import functools

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -223,15 +224,14 @@ def _process_conversation(self, conversation, rec): #process recording line

return result

def _process_recording(self, recording):
def _process_recording(self, recording, grouper):
"""for one recording, get the segments required, group by conversation and launch computation for each block
:param recording: recording_filename to which belongs that conversation
:type recording: str
:return: dict containing all the computed features result for that unit
:rtype: list[dict]
"""
grouper = 'conv_count'
segments = self.retrieve_segments(recording)
segments['voc_duration'] = segments['segment_offset'] - segments['segment_onset']

Expand Down Expand Up @@ -260,16 +260,17 @@ def extract(self):
:return: DataFrame of computed features
:rtype: pandas.DataFrame
"""
grouper = 'conv_count'
if self.threads == 1:

results = list(itertools.chain.from_iterable(map(self._process_recording, self.recordings)))
results = list(itertools.chain.from_iterable(map(functools.partial(self._process_recording, grouper=grouper), self.recordings)))
else:
with mp.Pool(
processes=self.threads if self.threads >= 1 else mp.cpu_count()
) as pool:
results = list(itertools.chain.from_iterable(pool.map(self._process_recording, self.recordings)))
results = list(itertools.chain.from_iterable(pool.map(functools.partial(self._process_recording, grouper=grouper), self.recordings)))

self.conversations = pd.DataFrame(results) if len(results) else pd.DataFrame(columns=grouper)
self.conversations = pd.DataFrame(results) if len(results) else pd.DataFrame(columns=[grouper])

# now add the rec_cols and child_cols in the result
if self.rec_cols:
Expand Down

0 comments on commit d522cba

Please sign in to comment.