Skip to content

Commit

Permalink
update script to generate data for ourdna dashbaord
Browse files Browse the repository at this point in the history
  • Loading branch information
dancoates committed Jan 30, 2025
1 parent d741d18 commit 405c74e
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 90 deletions.
305 changes: 215 additions & 90 deletions test/data/generate_ourdna_data.py
Original file line number Diff line number Diff line change
@@ -1,118 +1,242 @@
#!/usr/bin/env python3
"""
This is a simple script to generate 3 participants & its samples in the ourdna project
This is a simple script to generate some participants and samples for testing ourdna
Local Backend API needs to run prior executing this script
NOTE: This is WIP and will be updated with more features
If you want to regenerate the data you would need to
delete records from table sample and participant first
"""

import argparse
import asyncio
import datetime
import random
import uuid
from typing import Sequence, Union

from metamist.apis import ParticipantApi
from metamist.models import ParticipantUpsert, SampleUpsert

PRIMARY_EXTERNAL_ORG = ''

PARTICIPANTS = [
ParticipantUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'},
reported_sex=2,
karyotype='XX',
meta={'consent': True, 'field': 1},
samples=[

ANCESTRIES = [
'Vietnamese',
'Filipino',
'Australian',
'Spanish',
'Acehnese',
'Afghan',
'African American',
'American',
'Amhara',
'British',
'Chinese',
'English',
'German',
'Greek',
'Indian',
'Irish',
'Italian',
'Japanese',
'Malay',
'Norwegian',
'Scottish',
'Venezuelan',
]

BIRTHPLACES = [
'Philippines',
'Vietnam',
'Cambodia',
'Australia',
"I don't know",
"I'd prefer not to say",
'Thailand',
]

LANGUAGES = [
'Vietnamese',
'Filipino',
'Tagalog',
'Cebuano',
'English',
'Bisaya',
'Ilonggo (Hiligaynon)',
'Cantonese',
'Other Southern Asian Languages',
'Spanish',
'Ilokano',
'Bikol',
'American Languages',
'IIokano',
'Hawaiian English',
'Armenian',
'Khmer',
'Acehnese',
'Other Southeast Asian Languages',
'Urdu',
'French',
'Japanese',
'Thai',
'Italian',
'Croatian',
'Chin Haka',
'Arabic',
]


event_type = ['OSS', 'Walk in']
processing_site = ['bbv', 'Westmead']


def random_date_range():
"Generate a random date range"
# Throw in the occasional invalid date to simulate the current state of the data
# this should be removed once the data is cleaned up
if random.randint(0, 10) == 0:
return 'N/A', 'N/A'
start_date = datetime.datetime.now() - datetime.timedelta(

Check warning on line 97 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L95-L97

Added lines #L95 - L97 were not covered by tests
days=random.randint(1, 365)
)
end_date = start_date + datetime.timedelta(hours=random.randint(1, 150))
return start_date.strftime('%Y-%m-%d %H:%M:%S'), end_date.strftime(

Check warning on line 101 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L100-L101

Added lines #L100 - L101 were not covered by tests
'%Y-%m-%d %H:%M:%S'
)


def random_choice(
choices: Sequence[Union[str, bool, int]], weight_by_index: bool = False
):
"Pick a random choice from a list of choices"
weighted_choices = list(choices)
if weight_by_index:
for i, choice in enumerate(choices):
weighted_choices.extend([choice] * (len(choices) - i))
return weighted_choices[random.randint(0, len(weighted_choices) - 1)]

Check warning on line 114 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L110-L114

Added lines #L110 - L114 were not covered by tests


def random_list(
choices: Sequence[Union[str, bool, int]],
weight_by_index: bool = False,
min_len: int = 1,
max_len: int = 5,
):
"Generate a random list of choices"
result: list[Union[str, bool, int]] = []
desired_len = random.randint(min_len, max_len)
if desired_len > len(choices):
raise ValueError(

Check warning on line 127 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L124-L127

Added lines #L124 - L127 were not covered by tests
f'Desired length {desired_len} is greater than the number of choices {len(choices)}'
)
while len(result) < desired_len:
choice = random_choice(choices, weight_by_index)
if choice not in result:
result.append(choice)

Check warning on line 133 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L130-L133

Added lines #L130 - L133 were not covered by tests

return result

Check warning on line 135 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L135

Added line #L135 was not covered by tests


def create_samples():
"""Create a sample with nested samples"""
start_date, end_date = random_date_range()

Check warning on line 140 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L140

Added line #L140 was not covered by tests

meta = {

Check warning on line 142 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L142

Added line #L142 was not covered by tests
'collection-time': start_date,
'process-end-time': end_date,
'collection-event-type': random_choice(event_type),
'processing-site': random_choice(processing_site),
}

sample = SampleUpsert(

Check warning on line 149 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L149

Added line #L149 was not covered by tests
external_ids={PRIMARY_EXTERNAL_ORG: str(uuid.uuid4())},
type='blood',
active=True,
nested_samples=[
SampleUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'},
type='blood',
external_ids={PRIMARY_EXTERNAL_ORG: str(uuid.uuid4())},
type='guthrie-card',
active=True,
meta={
'collection-time': '2022-07-03 13:28:00',
'processing-site': 'Garvan',
'process-start-time': '2022-07-06 16:28:00',
'process-end-time': '2022-07-06 19:28:00',
'received-time': '2022-07-03 14:28:00',
'received-by': 'YP',
'collection-lab': 'XYZ LAB',
'collection-event-name': 'walk-in',
'courier': 'ABC COURIERS',
'courier-tracking-number': 'ABCDEF12562',
'courier-scheduled-pickup-time': '2022-07-03 13:28:00',
'courier-actual-pickup-time': '2022-07-03 13:28:00',
'courier-scheduled-dropoff-time': '2022-07-03 13:28:00',
'courier-actual-dropoff-time': '2022-07-03 13:28:00',
'concentration': 1.45,
},
)
],
),
ParticipantUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: 'EX02'},
reported_sex=1,
karyotype='XY',
meta={'field': 2},
samples=[
meta=meta,
),
SampleUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: 'Test02'},
type='blood',
external_ids={PRIMARY_EXTERNAL_ORG: str(uuid.uuid4())},
type='plasma',
active=True,
meta={
'collection-time': '2022-07-03 13:28:00',
'processing-site': 'BBV',
'process-start-time': '2022-07-06 16:28:00',
'process-end-time': '2022-07-06 19:28:00',
'received-time': '2022-07-03 14:28:00',
'received-by': 'YP',
'collection-lab': 'XYZ LAB',
'collection-event-name': 'EventA',
'courier': 'ABC COURIERS',
'courier-tracking-number': 'ABCDEF12562',
'courier-scheduled-pickup-time': '2022-07-03 13:28:00',
'courier-actual-pickup-time': '2022-07-03 13:28:00',
'courier-scheduled-dropoff-time': '2022-07-03 13:28:00',
'courier-actual-dropoff-time': '2022-07-03 13:28:00',
'concentration': 0.98,
},
)
],
),
ParticipantUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: 'EX03'},
reported_sex=2,
karyotype='XX',
meta={'consent': True, 'field': 3},
samples=[
meta=meta,
),
SampleUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: str(uuid.uuid4())},
type='buffy-coat',
active=True,
meta=meta,
),
SampleUpsert(
external_ids={PRIMARY_EXTERNAL_ORG: 'Test03'},
type='blood',
external_ids={PRIMARY_EXTERNAL_ORG: str(uuid.uuid4())},
type='pbmc',
active=True,
meta={
# 'collection-time': '2022-07-03 13:28:00',
'processing-site': 'Garvan',
# 'process-start-time': '2022-07-03 16:28:00',
# 'process-end-time': '2022-07-03 19:28:00',
'received-time': '2022-07-03 14:28:00',
'received-by': 'YP',
'collection-lab': 'XYZ LAB',
'courier': 'ABC COURIERS',
'courier-tracking-number': 'ABCDEF12562',
'courier-scheduled-pickup-time': '2022-07-03 13:28:00',
'courier-actual-pickup-time': '2022-07-03 13:28:00',
'courier-scheduled-dropoff-time': '2022-07-03 13:28:00',
'courier-actual-dropoff-time': '2022-07-03 13:28:00',
'concentration': 1.66,
},
)
meta=meta,
),
],
),
]
meta=meta,
)

return sample

Check warning on line 182 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L182

Added line #L182 was not covered by tests


def create_participant():
"""Create a participant with nested samples"""
participant = ParticipantUpsert(

Check warning on line 187 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L187

Added line #L187 was not covered by tests
external_ids={PRIMARY_EXTERNAL_ORG: str(uuid.uuid4())},
reported_sex=random_choice([1, 2]),
meta={
'ancestry-participant-ancestry': random_list(
ANCESTRIES, weight_by_index=True, min_len=1, max_len=2
),
'ancestry-mother-ancestry': random_list(
ANCESTRIES, weight_by_index=True, min_len=1, max_len=2
),
'ancestry-father-ancestry': random_list(
ANCESTRIES, weight_by_index=True, min_len=1, max_len=2
),
'ancestry-mother-birthplace': random_list(
BIRTHPLACES, weight_by_index=True, min_len=1, max_len=2
),
'ancestry-father-birthplace': random_list(
BIRTHPLACES, weight_by_index=True, min_len=1, max_len=2
),
'ancestry-language-other-than-english': random_list(
LANGUAGES, weight_by_index=True, min_len=1, max_len=2
),
'birth-year': random.randint(1900, 2010),
'blood-consent': random_choice(['yes', 'no']),
'informed-consent': random_choice(['yes', 'no']),
'choice-receive-genetic-info': random_choice(['yes', 'no']),
'choice-family-receive-genetic-info': random_choice(['yes', 'no']),
'choice-recontact': random_choice(['yes', 'no']),
'choice-general-updates': random_choice(['yes', 'no']),
'choice-use-of-cells-in-future-research-consent': random_choice(
['yes', 'no']
),
'choice-use-of-cells-in-future-research-understanding': random_list(
[
'grown_indefinitely',
'used_by_approved_researchers',
],
min_len=1,
max_len=2,
),
},
samples=[create_samples()],
)

return participant

Check warning on line 231 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L231

Added line #L231 was not covered by tests

async def main(project='ourdna'):

async def main(project='ourdna', num_participants=10):
"""Doing the generation for you"""
participant_api = ParticipantApi()
participants_rec = participant_api.upsert_participants(project, PARTICIPANTS)

participants = [create_participant() for _ in range(num_participants)]
participants_rec = participant_api.upsert_participants(project, participants)

Check warning on line 239 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L238-L239

Added lines #L238 - L239 were not covered by tests
print('inserted participants:', participants_rec)


Expand All @@ -121,5 +245,6 @@ async def main(project='ourdna'):
description='Script for generating data in the ourdna test project'
)
parser.add_argument('--project', type=str, default='ourdna')
parser.add_argument('--num-participants', type=str, default=10)

Check warning on line 248 in test/data/generate_ourdna_data.py

View check run for this annotation

Codecov / codecov/patch

test/data/generate_ourdna_data.py#L248

Added line #L248 was not covered by tests
args = vars(parser.parse_args())
asyncio.new_event_loop().run_until_complete(main(**args))
1 change: 1 addition & 0 deletions web/src/pages/report/SqlQueryUI.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ export default function SqlQueryUi() {
{projectName && (selectedTableQuery || tableQueryValue) && (
<TableFromQuery
project={projectName}
showToolbar
query={selectedTableQuery || tableQueryValue}
/>
)}
Expand Down

0 comments on commit 405c74e

Please sign in to comment.