Skip to content

Commit

Permalink
Merge pull request #3 from unmtransinfo/kekule
Browse files Browse the repository at this point in the history
Finalize badapple_classic db
  • Loading branch information
Jack-42 authored Oct 31, 2024
2 parents f4d65f5 + 1b95d53 commit 64ba003
Show file tree
Hide file tree
Showing 11 changed files with 923 additions and 952 deletions.
6 changes: 2 additions & 4 deletions badapple1_comparison/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,8 @@ Run `bash badapple1_comparison/sh_scripts/run_generate_scaffolds.sh`. This will
* `o_mol2scaf`: TSV file mapping compound CID to scaffold ID(s)

### (4) Initializing the DB
1. Install postgresql with the RDKit cartridge (requires sudo):
`apt install postgresql-14-rdkit`
2. Run `bash badapple1_comparison/sh_scripts/create_and_load_db.sh`
3. (Optional) Drop the activity table to save storage:
1. Run `bash badapple1_comparison/sh_scripts/create_and_load_db.sh`
2. (Optional) Drop the activity table to save storage:

`psql -d badapple_classic -c "DELETE FROM activity"`

Expand Down
14 changes: 0 additions & 14 deletions badapple1_comparison/sh_scripts/db/annotate_assaystats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ cd $REPO_DIR
# Step 1) Generate compound activity statistics. Populate/annotate
# compound table with calculated assay stats.
# (sTotal,sTested,sActive,aTested,aActive,wTested,wActive)
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.compound ADD COLUMN nsub_total INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.compound ADD COLUMN nsub_tested INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.compound ADD COLUMN nsub_active INTEGER"
psql -d $DB_NAME -c "UPDATE $SCHEMA.compound SET (nsub_total, nsub_tested, nsub_active) = (NULL, NULL, NULL)"
python src/annotate_db_assaystats.py \
--annotate_compounds \
Expand All @@ -44,21 +41,10 @@ echo "Done annotating compounds."
# Step 2) Generate scaf activity statistics. Populate/annotate scaffold table with calculated assay stats.
# Scaffold table must be ALTERed to contain activity statistics.
# (cTotal,cTested,cActive,sTotal,sTested,sActive,aTested,aActive,wTested,wActive)
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN ncpd_total INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN ncpd_tested INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN ncpd_active INTEGER"
psql -d $DB_NAME -c "UPDATE $SCHEMA.scaffold SET (ncpd_total, ncpd_tested, ncpd_active) = (NULL, NULL, NULL)"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nsub_total INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nsub_tested INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nsub_active INTEGER"
psql -d $DB_NAME -c "UPDATE $SCHEMA.scaffold SET (nsub_total, nsub_tested, nsub_active) = (NULL, NULL, NULL)"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nass_tested INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nass_active INTEGER"
psql -d $DB_NAME -c "UPDATE $SCHEMA.scaffold SET (nass_tested, nass_active) = (NULL, NULL)"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nsam_tested INTEGER"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN nsam_active INTEGER"
psql -d $DB_NAME -c "UPDATE $SCHEMA.scaffold SET (nsam_tested, nsam_active) = (NULL, NULL)"
psql -d $DB_NAME -c "ALTER TABLE $SCHEMA.scaffold ADD COLUMN in_drug BOOLEAN"
psql -d $DB_NAME -c "UPDATE $SCHEMA.scaffold SET in_drug = NULL"


Expand Down
3 changes: 2 additions & 1 deletion badapple1_comparison/sh_scripts/db/create_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ psql -h $DB_HOST -d $DB_NAME -c "COMMENT ON DATABASE ${DB_NAME} IS '$COMMENT'"
psql -h $DB_HOST -d $DB_NAME <<EOF
CREATE TABLE IF NOT EXISTS $SCHEMA.scaffold (
id INTEGER PRIMARY KEY,
scafsmi VARCHAR(512) NOT NULL,
scafsmi VARCHAR(512) NOT NULL UNIQUE,
kekule_scafsmi VARCHAR(512) NOT NULL UNIQUE,
scaftree VARCHAR(2048),
ncpd_total INTEGER,
ncpd_tested INTEGER,
Expand Down
9 changes: 5 additions & 4 deletions badapple1_comparison/sh_scripts/db/load_csv_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@ DATA_DIR=$5
HIERS_SCRIPT="generate_scaffolds.py"

# Step 1) Load scafs with scafids + scaf2scaf relationship
# expects SCAF_TSV_FILE to have header: scaffold_id smiles hierarchy scaf2scaf (TSV-separated)
# expects SCAF_TSV_FILE to have header: scaffold_id canon_smiles kekule_smiles hierarchy scaf2scaf (TSV-separated)
# (output from ${HIERS_SCRIPT})
SCAF_TSV_FILE="scafs.tsv"
SCAF_TSV_PATH="$DATA_DIR/$SCAF_TSV_FILE"
psql -h $DB_HOST -d $DB_NAME <<EOF
CREATE TEMP TABLE temp_scaf (
scaffold_id INTEGER PRIMARY KEY,
smiles VARCHAR(512) NOT NULL,
canon_smiles VARCHAR(512) NOT NULL UNIQUE,
kekule_smiles VARCHAR(512) NOT NULL UNIQUE,
hierarchy INTEGER,
scaf2scaf VARCHAR(2048)
);
\COPY temp_scaf (scaffold_id, smiles, hierarchy, scaf2scaf) FROM '$SCAF_TSV_PATH' WITH (FORMAT CSV, DELIMITER E'\t', HEADER true);
INSERT INTO ${SCHEMA}.scaffold (id, scafsmi, scaftree) SELECT scaffold_id, smiles, scaf2scaf FROM temp_scaf;
\COPY temp_scaf (scaffold_id, canon_smiles, kekule_smiles, hierarchy, scaf2scaf) FROM '$SCAF_TSV_PATH' WITH (FORMAT CSV, DELIMITER E'\t', HEADER true);
INSERT INTO ${SCHEMA}.scaffold (id, scafsmi, kekule_scafsmi, scaftree) SELECT scaffold_id, canon_smiles, kekule_smiles, scaf2scaf FROM temp_scaf;
DROP TABLE temp_scaf;
EOF
psql -h $DB_HOST -d $DB_NAME -c "COMMENT ON TABLE ${SCHEMA}.scaffold IS 'Scaffold definitions from HierS, see ${HIERS_SCRIPT}. Input file is ${SCAF_TSV_PATH}'"
Expand Down
225 changes: 0 additions & 225 deletions badapple1_comparison/src/notebooks/api_demo.ipynb

This file was deleted.

26 changes: 13 additions & 13 deletions badapple1_comparison/src/notebooks/assay_comparison.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -78,7 +78,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -88,7 +88,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -105,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -123,7 +123,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -133,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -144,7 +144,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 25,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -196,7 +196,7 @@
"0 774 823 "
]
},
"execution_count": 59,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -207,7 +207,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 26,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -259,7 +259,7 @@
"0 773 823 "
]
},
"execution_count": 60,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -277,7 +277,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
Expand Down
Loading

0 comments on commit 64ba003

Please sign in to comment.