From f10be20c224f36a31c72f1022912297857852ba5 Mon Sep 17 00:00:00 2001 From: Stephen Privitera Date: Thu, 14 Nov 2024 15:03:40 +0100 Subject: [PATCH] docs --- docs/index.md | 2 +- docs/installation.md | 2 +- phenex/sim.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/index.md b/docs/index.md index 0108fd0..e6f4dee 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,7 +9,7 @@ Implementing observational studies using real-world data (RWD) is challenging, r PhenEx (Automated Phenotype Extraction) fills this gap. PhenEx is a Python-based software package that provides reusuable and end-to-end tested implementations of commonly performed operations in the implementation of observational studies. The main advantages of PhenEx are: - **Arbitrarily complex medical definitions**: Build medical definitions that depend on diagnoses, labs, procedures, and encounter context, as well as on other medical definitions -- **Data-model agnostic**: Work with almost any RWD dataset with only extremely minimal mappings required. Only map the data needed for the study execution. +- **Data-model agnostic**: Work with almost any RWD dataset with only extremely minimal mappings. Only map the data needed for the study execution. Use the ontologies native to your dataset. - **Portable**: Built on top of [ibis](https://ibis-project.org/), PhenEx works with any backend that ibis supports, including snowflake, PySpark and many more! - **Intuitive interface**: Study specification in PhenEx mirrors plain language description of the study. - **High test coverage**: Full confidence answer is correct. diff --git a/docs/installation.md b/docs/installation.md index ee03308..070dbd7 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -16,7 +16,7 @@ Coming soon! To install from source, run the following from within your virtual environment: ``` -git clone git@github.com:Bayer-Group/PhenEx.git && \ +git clone https://github.com/Bayer-Group/PhenEx.git && \ cd PhenEx && \ pip install -r requirements.txt && \ pip install . diff --git a/phenex/sim.py b/phenex/sim.py index 101c2c8..7de3456 100644 --- a/phenex/sim.py +++ b/phenex/sim.py @@ -5,7 +5,7 @@ from dataclasses import asdict -def generate_fake_data(n_patients: int, domains: DomainsDictionary) -> Dict[str, pd.DataFrame]: +def generate_mock_mapped_tables(n_patients: int, domains: DomainsDictionary) -> Dict[str, pd.DataFrame]: """ Generate fake data for N patients based on the given domains. @@ -24,11 +24,9 @@ def generate_fake_data(n_patients: int, domains: DomainsDictionary) -> Dict[str, if "DATE" in col: start_date = pd.to_datetime('2000-01-01') end_date = pd.to_datetime('2020-12-31') - data[col] = pd.to_datetime(np.random.randint(start_date.value, end_date.value, n_patients)) + data[col] = pd.to_datetime(np.random.randint(start_date.value, end_date.value, n_patients)).date elif "ID" in col: data[col] = np.arange(1, n_patients + 1) - elif "CODE" in col: - data[col] = np.random.choice(['A', 'B', 'C', 'D'], n_patients) elif "VALUE" in col: data[col] = np.random.uniform(0, 100, n_patients) elif "CODE_TYPE" in col: @@ -40,6 +38,8 @@ def generate_fake_data(n_patients: int, domains: DomainsDictionary) -> Dict[str, data[col] = np.random.choice(['CPT', 'HCPCS'], n_patients) else: data[col] = np.random.choice(['TYPE1', 'TYPE2'], n_patients) + elif "CODE" in col: + data[col] = np.random.choice(['A', 'B', 'C', 'D', 'E', 'F', 'G'], n_patients) else: data[col] = np.random.choice(range(1000), n_patients) fake_data[domain] = pd.DataFrame(data)