diff --git a/dataload/01_ingest/grebi_ingest_reactome/src/main.rs b/dataload/01_ingest/grebi_ingest_reactome/src/main.rs index 8ef0d90..71c3517 100644 --- a/dataload/01_ingest/grebi_ingest_reactome/src/main.rs +++ b/dataload/01_ingest/grebi_ingest_reactome/src/main.rs @@ -22,7 +22,7 @@ fn main() { let normalise:PrefixMap = { - let rdr = BufReader::new( std::fs::File::open(env::var("GREBI_HOME").unwrap().to_owned() + "/prefix_maps/prefix_map_normalise.json").unwrap() ); + let rdr = BufReader::new( std::fs::File::open(env::var("GREBI_DATALOAD_HOME").unwrap().to_owned() + "/prefix_maps/prefix_map_normalise.json").unwrap() ); let mut builder = PrefixMapBuilder::new(); serde_json::from_reader::<_, HashMap>(rdr).unwrap().into_iter().for_each(|(k, v)| { builder.add_mapping(k, v); diff --git a/dataload/07_create_db/neo4j/neo4j_import.slurm.py b/dataload/07_create_db/neo4j/neo4j_import.slurm.py index b8611f9..656e3bd 100644 --- a/dataload/07_create_db/neo4j/neo4j_import.slurm.py +++ b/dataload/07_create_db/neo4j/neo4j_import.slurm.py @@ -34,8 +34,8 @@ def main(): '--bind ' + os.path.abspath(".") + ':/mnt', '--bind ' + shlex.quote(neo_data_path) + ':/data', '--bind ' + shlex.quote(neo_logs_path) + ':/logs', - '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/neo4j_import.dockersh')) + ':/import.sh', - '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/cypher')) + ':/cypher', + '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], '07_create_db/neo4j/neo4j_import.dockersh')) + ':/import.sh', + '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], '07_create_db/neo4j/cypher')) + ':/cypher', '--writable-tmpfs', '--network=none', '--env NEO4J_AUTH=none', @@ -49,8 +49,8 @@ def main(): ] + list(map(lambda f: "-v " + os.path.abspath(f) + ":/mnt/" + os.path.basename(f), glob.glob(args.in_csv_path + "/neo_*"))) + [ '-v ' + shlex.quote(neo_data_path) + ':/data', '-v ' + shlex.quote(neo_logs_path) + ':/logs', - '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/neo4j_import.dockersh')) + ':/import.sh', - '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/cypher')) + ':/cypher', + '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], '07_create_db/neo4j/neo4j_import.dockersh')) + ':/import.sh', + '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], '07_create_db/neo4j/cypher')) + ':/cypher', '-e NEO4J_AUTH=none', 'neo4j:5.18.0', 'bash /import.sh' diff --git a/dataload/07_create_db/solr/solr_import.slurm.py b/dataload/07_create_db/solr/solr_import.slurm.py index ac13f79..bd94213 100644 --- a/dataload/07_create_db/solr/solr_import.slurm.py +++ b/dataload/07_create_db/solr/solr_import.slurm.py @@ -37,7 +37,7 @@ def main(): ('--bind ' + os.path.abspath(args.in_names_txt) + ':/names.txt') if args.in_names_txt != None else '', '--bind ' + os.path.abspath(args.solr_config) + ':/config', '--bind ' + os.path.abspath(args.out_path) + ':/var/solr', - '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/solr/solr_import.dockerpy')) + ':/import.py', + '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], '07_create_db/solr/solr_import.dockerpy')) + ':/import.py', #'--writable-tmpfs', '--net --network=none', 'docker://ghcr.io/ebispot/grebi_solr_with_python:9.5.0', @@ -55,7 +55,7 @@ def main(): ('-v ' + os.path.abspath(args.in_names_txt) + ':/names.txt') if args.in_names_txt != None else '', '-v ' + os.path.abspath(args.solr_config) + ':/config', '-v ' + os.path.abspath(args.out_path) + ':/var/solr', - '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/solr/solr_import.dockerpy')) + ':/import.py', + '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], '07_create_db/solr/solr_import.dockerpy')) + ':/import.py', 'ghcr.io/ebispot/grebi_solr_with_python:9.5.0', 'python3 /import.py', args.core, args.port, args.mem ]) diff --git a/dataload/configs/datasource_configs/hett_pesticides_appril.yaml b/dataload/configs/datasource_configs/hett_pesticides_appril.yaml index 46f3e8f..93c1d71 100644 --- a/dataload/configs/datasource_configs/hett_pesticides_appril.yaml +++ b/dataload/configs/datasource_configs/hett_pesticides_appril.yaml @@ -2,4 +2,4 @@ name: HETT_Pesticides.APPRIL enabled: true ingests: - globs: ["./00_fetch_data/hett_pesticides/apprildatadump_public.xlsx"] - command: $GREBI_HOME/01_ingest/hett_pesticides_appril.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME + command: $GREBI_DATALOAD_HOME/01_ingest/hett_pesticides_appril.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME diff --git a/dataload/configs/datasource_configs/hett_pesticides_eu.yaml b/dataload/configs/datasource_configs/hett_pesticides_eu.yaml index a841e8f..969c964 100644 --- a/dataload/configs/datasource_configs/hett_pesticides_eu.yaml +++ b/dataload/configs/datasource_configs/hett_pesticides_eu.yaml @@ -2,4 +2,4 @@ name: HETT_Pesticides.EU enabled: true ingests: - globs: ["./00_fetch_data/hett_pesticides/ActiveSubstanceExport*"] - command: $GREBI_HOME/01_ingest/hett_pesticides_eu.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME + command: $GREBI_DATALOAD_HOME/01_ingest/hett_pesticides_eu.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME diff --git a/dataload/configs/datasource_configs/hett_pesticides_gb.yaml b/dataload/configs/datasource_configs/hett_pesticides_gb.yaml index 6e30dee..b38dd9f 100644 --- a/dataload/configs/datasource_configs/hett_pesticides_gb.yaml +++ b/dataload/configs/datasource_configs/hett_pesticides_gb.yaml @@ -2,4 +2,4 @@ name: HETT_Pesticides.GB enabled: true ingests: - globs: ["./00_fetch_data/hett_pesticides/active-substance-register.xlsx"] - command: $GREBI_HOME/01_ingest/hett_pesticides_gb.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME + command: $GREBI_DATALOAD_HOME/01_ingest/hett_pesticides_gb.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME diff --git a/dataload/configs/datasource_configs/metabolights.yaml b/dataload/configs/datasource_configs/metabolights.yaml index ce8cd3f..cf2f2a0 100644 --- a/dataload/configs/datasource_configs/metabolights.yaml +++ b/dataload/configs/datasource_configs/metabolights.yaml @@ -2,4 +2,4 @@ name: Metabolights enabled: true ingests: - globs: ["./eb-eye_metabolights_complete.xml"] - command: $GREBI_HOME/01_ingest/grebi_ingest_metabolights/ingest.py + command: $GREBI_DATALOAD_HOME/01_ingest/grebi_ingest_metabolights/ingest.py diff --git a/dataload/configs/datasource_configs/mondo_efo.yaml b/dataload/configs/datasource_configs/mondo_efo.yaml index 2aaac5d..bec36c4 100644 --- a/dataload/configs/datasource_configs/mondo_efo.yaml +++ b/dataload/configs/datasource_configs/mondo_efo.yaml @@ -2,4 +2,4 @@ name: EFO.mappings enabled: true ingests: - globs: ["./00_fetch_data/mondo_efo/mondo_efo_mappings.tsv"] - command: $GREBI_HOME/01_ingest/grebi_ingest_mondo_efo_mappings/ingest.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME + command: $GREBI_DATALOAD_HOME/01_ingest/grebi_ingest_mondo_efo_mappings/ingest.py --datasource-name $GREBI_INGEST_DATASOURCE_NAME diff --git a/dataload/nextflow/01_create_subgraph.nf b/dataload/nextflow/01_create_subgraph.nf index 4efa67c..aa96e04 100644 --- a/dataload/nextflow/01_create_subgraph.nf +++ b/dataload/nextflow/01_create_subgraph.nf @@ -5,7 +5,7 @@ import groovy.json.JsonSlurper jsonSlurper = new JsonSlurper() params.tmp = "$GREBI_TMP" -params.home = "$GREBI_HOME" +params.home = "$GREBI_DATALOAD_HOME" params.config = "$GREBI_CONFIG" params.subgraph = "$GREBI_SUBGRAPH" params.timestamp = "$GREBI_TIMESTAMP" diff --git a/dataload/nextflow/02_create_dbs.nf b/dataload/nextflow/02_create_dbs.nf index 45d3598..2132344 100644 --- a/dataload/nextflow/02_create_dbs.nf +++ b/dataload/nextflow/02_create_dbs.nf @@ -5,7 +5,7 @@ import groovy.json.JsonSlurper jsonSlurper = new JsonSlurper() params.tmp = "$GREBI_TMP" -params.home = "$GREBI_HOME" +params.home = "$GREBI_DATALOAD_HOME" params.config = "$GREBI_CONFIG" params.timestamp = "$GREBI_TIMESTAMP" params.is_ebi = "$GREBI_IS_EBI" diff --git a/dataload/scripts/dataload.py b/dataload/scripts/dataload.py index c35bc7c..36457dc 100644 --- a/dataload/scripts/dataload.py +++ b/dataload/scripts/dataload.py @@ -4,21 +4,21 @@ import os import subprocess -GREBI_HOME = os.environ['GREBI_HOME'] +GREBI_DATALOAD_HOME = os.environ['GREBI_DATALOAD_HOME'] GREBI_CONFIG = os.environ['GREBI_CONFIG'] GREBI_NEXTFLOW_CONFIG = os.environ['GREBI_NEXTFLOW_CONFIG'] -config = json.load(open(f'{GREBI_HOME}/configs/pipeline_configs/{GREBI_CONFIG}.json')) +config = json.load(open(f'{GREBI_DATALOAD_HOME}/configs/pipeline_configs/{GREBI_CONFIG}.json')) for subgraph in config['subgraphs']: print(f"===== LOADING SUBGRAPH: {subgraph} =====") os.environ['GREBI_SUBGRAPH'] = subgraph - res = os.system(f'NXF_WORK=work_{subgraph} nextflow {GREBI_HOME}/nextflow/01_create_subgraph.nf -c {GREBI_NEXTFLOW_CONFIG} -resume') + res = os.system(f'NXF_WORK=work_{subgraph} nextflow {GREBI_DATALOAD_HOME}/nextflow/01_create_subgraph.nf -c {GREBI_NEXTFLOW_CONFIG} -resume') if res != 0: exit(res) print(f"===== FINISHED LOADING SUBGRAPH: {subgraph} =====") -res = os.system(f'NXF_WORK=work_combined nextflow {GREBI_HOME}/nextflow/02_create_dbs.nf -c {GREBI_NEXTFLOW_CONFIG} -resume') +res = os.system(f'NXF_WORK=work_combined nextflow {GREBI_DATALOAD_HOME}/nextflow/02_create_dbs.nf -c {GREBI_NEXTFLOW_CONFIG} -resume') if res != 0: exit(res) diff --git a/dataload/scripts/dataload_00_prepare.py b/dataload/scripts/dataload_00_prepare.py index 2bf2358..783d9d4 100644 --- a/dataload/scripts/dataload_00_prepare.py +++ b/dataload/scripts/dataload_00_prepare.py @@ -9,12 +9,12 @@ def main(): print("subgraph is " + os.environ['GREBI_SUBGRAPH']) - config_filename = os.path.abspath(os.path.join(os.environ['GREBI_HOME'], 'configs/subgraph_configs/', os.environ['GREBI_SUBGRAPH'] + '.json')) + config_filename = os.path.abspath(os.path.join(os.environ['GREBI_DATALOAD_HOME'], 'configs/subgraph_configs/', os.environ['GREBI_SUBGRAPH'] + '.json')) with open(config_filename, 'r') as f: config = json.load(f) - datasources = map(lambda x: yaml.load(open(os.path.join(os.environ['GREBI_HOME'], x), 'r'), Loader=yaml.FullLoader), config['datasource_configs']) + datasources = map(lambda x: yaml.load(open(os.path.join(os.environ['GREBI_DATALOAD_HOME'], x), 'r'), Loader=yaml.FullLoader), config['datasource_configs']) datasource_files = [] for datasource in datasources: @@ -23,7 +23,7 @@ def main(): else: for ingest in datasource['ingests']: for g in ingest['globs']: - files = glob.glob(os.path.join(os.environ['GREBI_HOME'], g)) + files = glob.glob(os.path.join(os.environ['GREBI_DATALOAD_HOME'], g)) for file in files: filename = os.path.abspath(file) datasource_files.append(json.dumps({ diff --git a/dataload/scripts/dataload_codon.sh b/dataload/scripts/dataload_codon.sh index 19d5754..4c6f495 100755 --- a/dataload/scripts/dataload_codon.sh +++ b/dataload/scripts/dataload_codon.sh @@ -1,18 +1,18 @@ #!/bin/bash -export GREBI_HOME=/nfs/production/parkinso/spot/grebi +export GREBI_DATALOAD_HOME=/nfs/production/parkinso/spot/grebi/dataload export GREBI_TMP=/hps/nobackup/parkinso/spot/grebi/tmp export GREBI_CONFIG=ebi export GREBI_IS_EBI=true export GREBI_TIMESTAMP=$(date +%Y_%m_%d__%H_%M) export GREBI_MAX_ENTITIES=1000000000 -export GREBI_NEXTFLOW_CONFIG=$GREBI_HOME/nextflow/codon_nextflow.config +export GREBI_NEXTFLOW_CONFIG=$GREBI_DATALOAD_HOME/nextflow/codon_nextflow.config module load nextflow-22.10.1-gcc-11.2.0-ju5saqw module load python export PYTHONPATH="/homes/spotbot/.local/lib/python3.6/site-packages:$PYTHONPATH" cd /hps/nobackup/parkinso/spot/grebi/ export PYTHONUNBUFFERED=true srun -p datamover --time 1:0:0 --mem 8g bash -c "rm -rf /nfs/public/rw/ontoapps/grebi/staging && mkdir /nfs/public/rw/ontoapps/grebi/staging" -srun --time 3-0:0:0 --mem 8g bash -c "rm -rf work* tmp && python3 ${GREBI_HOME}/scripts/dataload.py" -#srun --time 23:0:0 --mem 8g bash -c "python3 ${GREBI_HOME}/scripts/dataload_codon.py" +srun --time 3-0:0:0 --mem 8g bash -c "rm -rf work* tmp && python3 ${GREBI_DATALOAD_HOME}/scripts/dataload.py" +#srun --time 23:0:0 --mem 8g bash -c "python3 ${GREBI_DATALOAD_HOME}/scripts/dataload_codon.py" diff --git a/dataload/scripts/dataload_local.sh b/dataload/scripts/dataload_local.sh index 18d5bea..da72321 100755 --- a/dataload/scripts/dataload_local.sh +++ b/dataload/scripts/dataload_local.sh @@ -1,15 +1,15 @@ #!/bin/bash -export GREBI_HOME=~/grebi +export GREBI_DATALOAD_HOME=~/grebi/dataload export GREBI_TMP=$(pwd) export GREBI_CONFIG=ebi export GREBI_IS_EBI=false export GREBI_TIMESTAMP=$(date +%Y_%m_%d__%H_%M) export RUST_BACKTRACE=full -export GREBI_NEXTFLOW_CONFIG=$GREBI_HOME/nextflow/local_nextflow.config +export GREBI_NEXTFLOW_CONFIG=$GREBI_DATALOAD_HOME/nextflow/local_nextflow.config cd $GREBI_TMP export PYTHONUNBUFFERED=true rm -rf work tmp -python3 ${GREBI_HOME}/scripts/dataload.py +python3 ${GREBI_DATALOAD_HOME}/scripts/dataload.py diff --git a/dataload/scripts/dataload_saturos.sh b/dataload/scripts/dataload_saturos.sh index 039f6f6..f78d6b6 100755 --- a/dataload/scripts/dataload_saturos.sh +++ b/dataload/scripts/dataload_saturos.sh @@ -1,15 +1,15 @@ #!/bin/bash -export GREBI_HOME=/home/james/grebi +export GREBI_DATALOAD_HOME=/home/james/grebi/dataload export GREBI_TMP=/data/grebi_tmp export GREBI_CONFIG=ebi export GREBI_IS_EBI=false export GREBI_TIMESTAMP=$(date +%Y_%m_%d__%H_%M) export RUST_BACKTRACE=full -export GREBI_NEXTFLOW_CONFIG=$GREBI_HOME/nextflow/saturos_nextflow.config +export GREBI_NEXTFLOW_CONFIG=$GREBI_DATALOAD_HOME/nextflow/saturos_nextflow.config cd $GREBI_TMP export PYTHONUNBUFFERED=true rm -rf work tmp -python3 ${GREBI_HOME}/scripts/dataload.py +python3 ${GREBI_DATALOAD_HOME}/scripts/dataload.py diff --git a/dataload/scripts/start_local_neo.py b/dataload/scripts/start_local_neo.py index 44d1e32..e7bf544 100755 --- a/dataload/scripts/start_local_neo.py +++ b/dataload/scripts/start_local_neo.py @@ -8,7 +8,7 @@ import glob from subprocess import Popen, PIPE, STDOUT -GREBI_HOME = os.environ['GREBI_HOME'] +GREBI_DATALOAD_HOME = os.environ['GREBI_DATALOAD_HOME'] GREBI_CONFIG = os.environ['GREBI_CONFIG'] GREBI_TMP = os.environ['GREBI_TMP']