Skip to content

new_release

new_release #423

Workflow file for this run

name: new_release
on:
workflow_dispatch:
schedule:
- cron: '5 0 * * 1,2,3,4,5,6' # At 00:05 every MTWTFS
env:
DOCKERVERSION: latest
TAXROOT: 2759
jobs:
get_dockers:
runs-on: [ self-hosted, runner1 ]
steps:
- name: Get dockers
run: |
docker pull genomehubs/genomehubs:$DOCKERVERSION
docker run --rm genomehubs/genomehubs sh -c 'echo $CONTAINER_VERSION'
update_ncbi_ena_tolids:
needs:
- get_dockers
runs-on: [ self-hosted, runner1 ]
steps:
- name: Cleanup build folder
run: |
rm -rf ./* || true
rm -rf ./.??* || true
- name: Get release name from date
run: echo "RELEASE=$(date +'%Y.%m.%d')" | tee /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- name: checkout first
uses: actions/checkout@v3
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token
clean: true
- name: checkout RELEASE, or create a new branch
run: |
git checkout -b $RELEASE
- name: Download NCBI datasets executable
run: |
curl https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-amd64/datasets > datasets
chmod a+x datasets
sudo ip link set ens3 mtu 1500
- name: Download NCBI TAXROOT datasets zip
run: |
mkdir -p sources/assembly-data
./datasets download genome taxon "$TAXROOT" --no-progressbar --dehydrated --filename sources/assembly-data/$TAXROOT.zip
- name: Unzip NCBI eukaryota datasets zip
run: unzip -o -d sources/assembly-data sources/assembly-data/$TAXROOT.zip ncbi_dataset/data/assembly_data_report.jsonl
- name: genomehubs parse --ncbi-datasets-genome
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs parse --ncbi-datasets-genome sources/assembly-data --outfile sources/assembly-data/ncbi_datasets_eukaryota.tsv.gz"
cp sources/assembly-data/ncbi_datasets_eukaryota.tsv.gz sources/assembly-data-taxon/ncbi_datasets_eukaryota.tsv.gz
- name: genomehubs parse --ncbi-datasets-sample
run: |
mkdir -p sources/assembly-data-sample
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs parse --ncbi-datasets-sample sources/assembly-data --outfile sources/assembly-data-sample/ncbi_datasets_eukaryota_sample.tsv.gz"
- name: Clean up expanded ncbi datasets zip
run: rm -rf sources/assembly-data/ncbi_dataset datasets
- name: genomehubs parse --refseq-organelles
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs parse --refseq-organelles --outfile sources/assembly-data/refseq_organelles.tsv.gz"
- name: Get latest NCBI taxdump
run: |
mkdir -p /tmp/new_taxdump/ncbi && \
curl -s https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz \
| tar xzf - nodes.dmp names.dmp && \
mv nodes.dmp names.dmp /tmp/new_taxdump/ncbi
- name: Get extra ENA taxonomy nodes
run: |
mkdir -p sources/ena-taxonomy
cd sources/ena-taxonomy
bash ../../scripts/get_ena_taxonomy_extra.bash $TAXROOT
rm -f ena-taxonomy.extra.jsonl ena-taxonomy.xml.taxids \
resulttaxon.tax_tree$TAXROOT.taxids ena-taxonomy.extra.prev.jsonl \
ena-taxonomy.extra.prev.taxids resulttaxon.tax_tree$TAXROOT.extra.curr.taxids \
ena-taxonomy.extra.curr.jsonl
cd ../../
- name: Get latest tolids
run: |
mkdir -p sources/tolids
curl -s https://gitlab.com/wtsi-grit/darwin-tree-of-life-sample-naming/-/raw/master/tolids.txt \
| gzip -c > sources/tolids/tolids.tsv.gz
- name: Update sources/goat.yaml
run: |
perl -i -plne 's/20\d\d\.\d\d.\d\d/'$RELEASE'/g' sources/goat.yaml
- name: Commit files in branch
run: |
git config --local user.email "[email protected]"
git config --local user.name "sujaikumar"
git add -A
git commit -m "Add changes from workflow running $RELEASE"
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ env.RELEASE }}
update_api_googlesheets_data:
needs:
- get_dockers
- update_ncbi_ena_tolids
runs-on: [ self-hosted, runner2 ]
steps:
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install requests pyyaml pandas numpy
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
ref: ${{ env.RELEASE }}
- name: Get from nhm and vgl and STS APIs
run: |
python3 ./scripts/api_to_tsv.py ${{ secrets.STS_AUTHORIZATION_KEY }} ${{ secrets.JGI_OFFLINE_TOKEN }}
- name: Get Schema 2.5 status files
run: |
python3 ./scripts/import_status.py "${{ secrets.ORIGINAL2_5_SCHEMA }}"
- name: process nhm tsv
run: |
perl -i -plne '
s/(.*institutionCode.*)/$1\tsequencing_status\tsample_collected\tsample_collected_by/ and next;
s/$/\tsample_collected\tDTOL\tNHM/;
' ./sources/status_lists/nhm.tsv
- name: process vgp tsv
run: |
perl -i -plne '
s/(.*taxon_id.*)$/$1\tlong_list\tsample_collected\tsample_acquired\tin_progress\tinsdc_submitted\tinsdc_open\tpublished/;
s/(.*0-data_queued.*)$/$1\tVGP\tVGP\tVGP\t\t\t\t/;
s/(.(1-DNA_isolation|2-data_progress|3-data_done|4-asm_progress|5-asm_done|6-asm_curation|7-asm_curated).*)$/$1\tVGP\tVGP\tVGP\tVGP\t\t\t/;
s/(.*8-NCBI.*)$/$1\tVGP\tVGP\tVGP\tVGP\tVGP\tVGP\t/;
' ./sources/status_lists/vgp.tsv
- name: Get from jgi api
run: python3 ./scripts/jgi_to_tsv.py ${{ secrets.JGI_OFFLINE_TOKEN }} > ./sources/status_lists/jgi_1kfg.tsv
- name: Get from googlesheets (Kew, DTOL Assembly Informatics, Aus Genome Status Bioplatforms)
run: |
docker pull genomehubs/r-ver:latest
docker run --rm --user $UID:$GROUPS \
-v $GITHUB_WORKSPACE:/goat-data \
-w /goat-data \
genomehubs/r-ver:latest \
Rscript ./scripts/get_googlesheets.R
cp sources/assembly-data/cngb.tsv sources/assembly-data-taxon/cngb.tsv
- name: Update source_date in yamls
run: |
RELEASEDATE=${RELEASE//./-}
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/genomesize_karyotype/FILE_DTOL_assembly_informatics_status_kmer_draft.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/genomesize_karyotype/FILE_DTOL_Plant_Genome_Size_Estimates.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/status_lists/FILE_DTOL_nhm.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/status_lists/FILE_DTOL_sts.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/status_lists/FILE_EBP_Nor_long_list.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/status_lists/FILE_eurofish.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/status_lists/FILE_jgi1kfg_status.types.yaml
perl -i -plne 's/source_date:\s*202\d.\d\d.\d\d/source_date: '$RELEASEDATE'/g' sources/status_lists/FILE_vgp_table_tracker.types.yaml
- name: Commit files in branch
run: |
git config --local user.email "[email protected]"
git config --local user.name "sujaikumar"
git add -A
git commit -m "Add changes from workflow running $RELEASE"
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ env.RELEASE }}
genomehubs_init:
needs:
- get_dockers
- update_ncbi_ena_tolids
runs-on: [ self-hosted, runner1 ]
steps:
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
ref: ${{ env.RELEASE }}
- name: Check es health
run: curl -s "es1:9200/_cat/health"
- name: Create dev snapshot repo if it doesn't exist
run: |
curl -s -X PUT "es1:9200/_snapshot/dev" \
-H 'Content-Type: application/json' \
-d' { "type": "fs", "settings": { "location": "/usr/share/elasticsearch/snapshots/dev" } }' || exit 0
- name: Create production-RELEASE snapshot repo if it doesn't exist
run: |
curl -s -X PUT "es1:9200/_snapshot/production-$RELEASE" \
-H 'Content-Type: application/json' \
-d' { "type": "fs", "settings": { "location": "/usr/share/elasticsearch/snapshots/production/production-'$RELEASE'" } }' || exit 0
- name: Delete indices and snapshots for that release, except dev/analysis_file snapshot
run: |
curl -s -X DELETE "es1:9200/*--20*"
curl -s -X DELETE "es1:9200/_snapshot/dev/20*"
- name: Restore and rename analysis_file snapshot to current RELEASE indices
run: |
curl -s -X POST "es1:9200/_snapshot/dev/analysis_file/_restore?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' {
"ignore_unavailable": true,
"include_global_state": false,
"include_aliases": false,
"rename_pattern" : "--goat--202.*",
"rename_replacement" : "--goat--'$RELEASE'"
}'
- name: genomehubs init
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs init \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxonomy-jsonl sources/ena-taxonomy/ena-taxonomy.extra.jsonl.gz \
--taxonomy-ncbi-root $TAXROOT \
--taxon-preload"
- name: Add ES index.mapping.nested_objects.limit 1000000
run: |
curl -s -X PUT "es1:9200/taxon--ncbi--goat--${RELEASE}/_settings" \
-H 'Content-Type: application/json' \
-d '{ "index.mapping.nested_objects.limit" : 1000000 }'
- name: genomehubs index assembly assembly-data
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--assembly-dir sources/assembly-data"
- name: genomehubs index assembly assembly-data-sample
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--sample-dir sources/assembly-data-sample"
- name: Snapshot RELEASE initassembly
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_initassembly" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_initassembly?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
- name: Snapshot BACKUP initassembly
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/initassembly" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/initassembly?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
genomehubs_index_attributes:
needs:
- get_dockers
- genomehubs_init
- update_api_googlesheets_data
runs-on: [ self-hosted, runner1 ]
steps:
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
ref: ${{ env.RELEASE }}
- name: genomehubs index taxon ott3.3
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/ott3.3"
- name: genomehubs index taxon tolids
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/tolids \
--taxon-lookup any --taxon-spellcheck"
- name: Snapshot initassembly_otttolids
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
- name: genomehubs index taxon genomesizekaryotype
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/genomesize_karyotype \
--taxon-lookup any --taxon-spellcheck"
- name: Snapshot initassembly_otttolids_gensizekaryo
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
- name: genomehubs index taxon regional_lists
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/regional_lists \
--taxon-lookup any --taxon-spellcheck"
- name: genomehubs index taxon uk_legislation
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/uk_legislation \
--taxon-lookup any --taxon-spellcheck"
- name: genomehubs parse btk
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs parse \
--btk --btk-root Eukaryota --outfile sources/btk/btk.tsv.gz"
- name: genomehubs index assembly btk assemblies
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--assembly-dir sources/btk"
- name: Snapshot initassembly_otttolids_gensizekaryo_reglegbtkasm
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
- name: genomehubs index taxon lineages
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/lineages"
- name: genomehubs index taxon status_lists
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/status_lists \
--taxon-lookup any --taxon-spellcheck"
- name: genomehubs index taxon assembly-data-taxon
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--taxon-dir sources/assembly-data-taxon"
- name: Snapshot initassembly_otttolids_gensizekaryo_reglegbtkasm_status
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
- name: Commit files in branch
run: |
git config --local user.email "[email protected]"
git config --local user.name "sujaikumar"
git add -A
git commit -m "Add changes from workflow running $RELEASE"
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ env.RELEASE }}
genomehubs_index_files:
needs:
- get_dockers
- genomehubs_index_attributes
runs-on: [ self-hosted, runner2 ]
steps:
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
ref: ${{ env.RELEASE }}
- name: Collect extra analysis and files compared to main branch
run: |
mkdir -p sources/btk_files_extra/
git show origin/main:sources/btk/btk.files.yaml > sources/btk/btk.files.yaml.main
git show origin/${{ env.RELEASE }}:sources/btk/btk.files.yaml > sources/btk/btk.files.yaml.${{ env.RELEASE }}
diff \
<(cat btk.files.yaml.${{ env.RELEASE }} | paste - - - - - - - - - - - - - - -) \
<(cat btk.files.yaml.main | paste - - - - - - - - - - - - - - -) \
| perl -lne 'if (/^< /) { s/^< //; s/\t/\n/g; print}' \
> sources/btk_files_extra/btk.files.yaml
- name: genomehubs index extra btk files
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
-v /volumes/docker/resources:/genomehubs/resources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs index \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--file-metadata sources/btk_files_extra/btk.files.yaml"
# - name: genomehubs index all btk files
# run: |
# docker run --rm --network=host \
# -v `pwd`/sources:/genomehubs/sources \
# -v /volumes/docker/resources:/genomehubs/resources \
# genomehubs/genomehubs:$DOCKERVERSION bash -c \
# "genomehubs index \
# --es-host es1:9200 \
# --taxonomy-source ncbi \
# --config-file sources/goat.yaml \
# --file-metadata sources/btk/btk.files.yaml"
- name: Snapshot RELEASE analysis_file
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_analysis_file" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_analysis_file?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "analysis*,file*", "include_global_state":false}'
- name: Snapshot BACKUP analysis_file
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/analysis_file" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/analysis_file?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "analysis*,file*", "include_global_state":false}'
- name: Snapshot files to production-$RELEASE
run: |
curl -s -X DELETE "es1:9200/_snapshot/production-${RELEASE}/${RELEASE}_analysis_file" || exit 0
curl -s -X PUT "es1:9200/_snapshot/production-${RELEASE}/${RELEASE}_analysis_file?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "analysis*,file*", "include_global_state":false}'
genomehubs_fill:
needs:
- get_dockers
- genomehubs_index_attributes
runs-on: [ self-hosted, runner1 ]
steps:
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
ref: ${{ env.RELEASE }}
- name: genomehubs fill
run: |
docker run --rm --network=host \
-v `pwd`/sources:/genomehubs/sources \
genomehubs/genomehubs:$DOCKERVERSION bash -c \
"genomehubs fill \
--es-host es1:9200 \
--taxonomy-source ncbi \
--config-file sources/goat.yaml \
--traverse-root $TAXROOT \
--traverse-infer-both"
- name: Snapshot fill
run: |
curl -s -X DELETE "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status_fill" || exit 0
curl -s -X PUT "es1:9200/_snapshot/dev/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status_fill?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
- name: Snapshot final fill to production-$RELEASE
run: |
curl -s -X DELETE "es1:9200/_snapshot/production-${RELEASE}/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status_fill" || exit 0
curl -s -X PUT "es1:9200/_snapshot/production-${RELEASE}/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status_fill?wait_for_completion=true&pretty" \
-H 'Content-Type: application/json' \
-d' { "indices": "assembly*${{env.RELEASE}},attributes*${{env.RELEASE}},feature*${{env.RELEASE}},identifiers*${{env.RELEASE}},sample*${{env.RELEASE}},taxon*${{env.RELEASE}}", "include_global_state":false}'
rsync_to_lustre:
needs:
- genomehubs_index_files
- genomehubs_fill
runs-on: [ self-hosted, runner1 ]
steps:
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- name: rsync production snapshot to sanger lustre
run: |
rsync -av --delete /volumes/docker/snapshots/production/production-$RELEASE farm5:/lustre/scratch123/tol/share/goat/dev/
rsync -av --delete /volumes/docker/resources/files/ farm5:/lustre/scratch123/tol/share/goat/dev/files/
restore_on_prod:
needs:
- genomehubs_fill
- genomehubs_index_files
runs-on: [ self-hosted, runner2 ]
steps:
- name: Get bash release name
run: cat /tmp/CURRENT_RELEASE >> $GITHUB_ENV
- name: ssh prod and restore
run: |
ssh tol-goat-prod-run1 "
curl -s -X DELETE 'es1:9200/*--$RELEASE'
curl -s -X PUT 'es1:9200/_snapshot/production-$RELEASE' \
-H 'Content-Type: application/json' \
-d' { \"type\": \"fs\", \"settings\": { \"location\": \"/usr/share/elasticsearch/snapshots/production/production-$RELEASE\" } }'
curl -s -X POST 'es1:9200/_snapshot/production-${RELEASE}/${RELEASE}_analysis_file/_restore?wait_for_completion=true&pretty' \
-H 'Content-Type: application/json' \
-d' { \"indices\": \"*--$RELEASE\", \"ignore_unavailable\": true, \"include_global_state\": false, \"include_aliases\": false}'
curl -s -X POST 'es1:9200/_snapshot/production-${RELEASE}/${RELEASE}_initassembly_otttolids_gensizekaryo_reglegbtkasm_status_fill/_restore?wait_for_completion=true&pretty' \
-H 'Content-Type: application/json' \
-d' { \"indices\": \"*--$RELEASE\", \"ignore_unavailable\": true, \"include_global_state\": false, \"include_aliases\": false}'
"