Skip to content

Commit

Permalink
feat: detect bouts of activity and calculate bout metrics.
Browse files Browse the repository at this point in the history
Result is a dataframe (each row is a bout) with the following columns:

- StartTime: Bout start time.
- EndTime: Bout end time.
- Duration(mins): Bout duration (in minutes).
- TimeSinceLast(mins): Time since end of last bout (in minutes).
- Steps: Number of steps in the bout.
- Cadence(steps/min): Bout cadence (steps per minute).
- CadenceSD(steps/min): Bout cadence standard deviation (steps per minute).
- Cadence25th(steps/min): 25th percentile cadence (steps per minute) in the bout.
- Cadence50th(steps/min): Median cadence (steps per minute) in the bout.
- Cadence75th(steps/min): 75th percentile cadence (steps per minute) in the bout.
- ENMO(mg): Bout mean ENMO (milli-g).
- ENMOMed(mg): Bout median ENMO (milli-g).

Output file: <input-file>-Bouts.csv.gz
  • Loading branch information
chanshing committed Sep 6, 2024
1 parent 08acd43 commit e67b300
Showing 1 changed file with 155 additions and 0 deletions.
155 changes: 155 additions & 0 deletions src/stepcount/stepcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import argparse
import json
import re
from collections import defaultdict
import numpy as np
import pandas as pd
import joblib
from numba import njit

from stepcount import utils
from stepcount import __version__
Expand Down Expand Up @@ -258,6 +260,11 @@ def main():
info['CadencePeak30Adjusted(steps/min)_Weekday'] = cadence_summary_adj['weekday_cadence_peak30']
info['Cadence95thAdjusted(steps/min)_Weekday'] = cadence_summary_adj['weekday_cadence_p95']

# Bouts summary
bouts_summary = summarize_bouts(Y, W, data)
bouts_summary = pd.DataFrame(bouts_summary)
bouts_summary.to_csv(f"{outdir}/{basename}-Bouts.csv.gz", index=False)

# Save Info.json
with open(f"{outdir}/{basename}-Info.json", 'w') as f:
json.dump(info, f, indent=4, cls=utils.NpEncoder)
Expand Down Expand Up @@ -899,6 +906,154 @@ def _cadence_p95(x, steptol, walktol=30):
}


def summarize_bouts(
Y: pd.Series,
W: pd.Series,
data: pd.DataFrame
):
"""
Summarize bouts of walking activity. For each detected bout, it calculates
start and end times, duration, total steps, ENMO and cadence metrics.
Parameters:
- Y (pd.Series): A pandas Series of step counts.
- W (pd.Series): A pandas Series indicating walking (1) and non-walking (0) windows, aligned with Y.
- data (pd.DataFrame): A pandas DataFrame containing raw acceleration data.
Returns:
- dict: A dictionary containing summary information for each detected bout, with the following keys:
- 'StartTime': List of start times for each bout.
- 'EndTime': List of end times for each bout.
- 'Duration(mins)': List of durations (in minutes) for each bout.
- 'TimeSinceLast(mins)': List of time since the last bout (in minutes) for each bout.
- 'Steps': List of total steps for each bout.
- 'Cadence(steps/min)': List of average cadence (steps per minute) for each bout.
- 'CadenceSD(steps/min)': List of standard deviations of cadence (steps per minute) for each bout.
- 'Cadence25th(steps/min)': List of 25th percentile cadence (steps per minute) for each bout.
- 'Cadence50th(steps/min)': List of median cadence (steps per minute) for each bout.
- 'Cadence75th(steps/min)': List of 75th percentile cadence (steps per minute) for each bout.
- 'ENMO(mg)': Mean ENMO for each bout.
- 'ENMOMed(mg)': Median ENMO for each bout.
"""

bouts = numba_detect_bouts(W.to_numpy())

bouts_summary = defaultdict(list)

dt = utils.infer_freq(Y.index)
one_min = pd.Timedelta('1min')

# Truncated ENMO: Euclidean norm minus one and clipped at zero
v = np.sqrt(data['x'] ** 2 + data['y'] ** 2 + data['z'] ** 2)
v = np.clip(v - 1, a_min=0, a_max=None)
v *= 1000 # convert to mg
# resample to match Y
v = v.resample(dt).mean().reindex(Y.index, method='nearest', tolerance=dt)

tlast = None
for i, n in bouts:
y = Y.iloc[i:i + n]
bout_steps = y.sum()
bout_duration = n * dt / one_min # in minutes
bout_cadence = bout_steps / bout_duration # steps per minute
# rescale to steps per minute
y *= one_min / dt
bout_cadence_sd = y.std()
bout_cadence_25th = y.quantile(0.25)
bout_cadence_50th = y.quantile(0.50)
bout_cadence_75th = y.quantile(0.75)
tstart, tend = y.index[0], y.index[-1]
if tlast is not None:
tsince = (tstart - tlast).total_seconds() / 60
else:
tsince = np.nan
tlast = y.index[-1]
bout_enmo = v.loc[tstart:tend].mean()
bout_enmo_med = v.loc[tstart:tend].median()
bouts_summary['StartTime'].append(tstart.strftime('%Y-%m-%d %H:%M:%S'))
bouts_summary['EndTime'].append(tend.strftime('%Y-%m-%d %H:%M:%S'))
bouts_summary['Duration(mins)'].append(bout_duration)
bouts_summary['TimeSinceLast(mins)'].append(tsince)
bouts_summary['Steps'].append(bout_steps)
bouts_summary['Cadence(steps/min)'].append(bout_cadence)
bouts_summary['CadenceSD(steps/min)'].append(bout_cadence_sd)
bouts_summary['Cadence25th(steps/min)'].append(bout_cadence_25th)
bouts_summary['Cadence50th(steps/min)'].append(bout_cadence_50th)
bouts_summary['Cadence75th(steps/min)'].append(bout_cadence_75th)
bouts_summary['ENMO(mg)'].append(bout_enmo)
bouts_summary['ENMOMed(mg)'].append(bout_enmo_med)

return bouts_summary


@njit
def numba_detect_bouts(
arr: np.ndarray,
min_percent_ones: float = 0.8,
max_trailing_zeros: int = 3
):
"""
For a series of 0s and 1s, find the start and duration of each bout.
A bout is a series of 0s and 1s where any expanding average is at least
`min_percent_ones`. If a bout has more than `max_trailing_zeros` trailing
0s, the bout ends. Trailing 0s are not counted in the bout length.
Parameters:
- arr (np.ndarray): An array of 0s and 1s representing activity data.
- min_percent_ones (float, optional): The minimum proportion of 1s required for a sequence
to be considered a bout. Default is 0.8.
- max_trailing_zeros (int, optional): The maximum number of trailing 0s allowed in a bout
before it is considered to have ended. Default is 3.
Returns:
- list of tuple: A list of tuples where each tuple represents a detected bout. Each tuple
contains the start index and the length of the bout.
Example:
arr: [0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0]
min_percent_ones: 0.5
max_trailing_zeros: 3
Output: [(1, 1), (4, 2), (9, 10)]
min_percent_ones: 0.5
max_trailing_zeros: 2
Output: [(1, 1), (4, 2), (9, 3), (15, 4)]
"""

bouts = []
bout_start = None
bout_length = 0
bout_sum = 0
trailing_zeros = 0

for i, a in enumerate(arr):
if a == 1:
if bout_start is None: # start new bout
bout_start = i
bout_sum += 1
bout_length += 1
trailing_zeros = 0
else:
if bout_start is None:
continue # skip if no bout is ongoing
bout_length += 1
trailing_zeros += 1
# if too many trailing zeros or not enough ones, end the bout
if trailing_zeros > max_trailing_zeros or bout_sum / bout_length < min_percent_ones:
bouts.append((bout_start, bout_length - trailing_zeros))
bout_start = None
bout_length = 0
bout_sum = 0
trailing_zeros = 0

# if the last bout is ongoing, add it to the list
if bout_start is not None:
bouts.append((bout_start, bout_length - trailing_zeros))

return bouts



if __name__ == '__main__':
main()

0 comments on commit e67b300

Please sign in to comment.