-
Notifications
You must be signed in to change notification settings - Fork 68
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(brainreports): formatting tweaks (#1118)
* chore(brainreports): formatting tweaks * fix(brain-reports): cache, collect from 2020-01-13 * chore(gen3 api): indexd-download-all helper
- Loading branch information
Showing
7 changed files
with
227 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
s3://bhcprodv2-data-bucket/dg.7519/1090d55e-a04c-49f1-82af-98b1080031eb/real-pd.testing_data_updated.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/11283b92-583d-4bd8-b25e-de43c897941f/cis-pd.ancillary_data.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/2798041a-5165-472e-994b-c0ed419f9c49/cis-pd.testing_data.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/4ebe9c9e-20e0-4374-9837-e43f39f1858a/cis-pd.training_data.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/54aee383-5bd5-48b1-a4e7-00c404684f3f/BEAT-PD_Challenge_Data_Dictionary.csv | ||
s3://bhcprodv2-data-bucket/dg.7519/69df65dd-6550-4fe0-b037-333280c0c2db/cis-pd.data_labels.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/73ce9ceb-a774-4dd2-b011-fcbd5943f6dd/cis-pd.clinical_data.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/88ab7900-b0c1-4ee1-a710-fe909cf9b0fd/real-pd.data_labels.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/8c1a8185-7098-4f21-be9e-7b53c1d69737/real-pd.REAL-PD_Test_Data_IDs.csv | ||
s3://bhcprodv2-data-bucket/dg.7519/8f67f4b9-21ab-40e0-af4c-914ebdc2df17/real-pd.clinical_data.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/ce1b8a45-2504-4433-99be-2f373894d71b/real-pd.ancillary_data_updated.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/d244cea7-5e45-4185-ae2b-a7f6440b1d59/real-pd.training_data_updated.tar.bz2 | ||
s3://bhcprodv2-data-bucket/dg.7519/e5ef9c59-b44b-456d-b82d-e4e049c3eb27/cis-pd.CIS-PD_Test_Data_IDs.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# | ||
# Generate S3 access and Dream-challenger user login reports for | ||
# the brain commons, and publish to dashboard service | ||
# | ||
# Run as cron: | ||
# GEN3_HOME=/home/bhcprodv2/cloud-automation | ||
# PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin | ||
# 2 2 * * 1 (if [ -f $GEN3_HOME/files/scripts/braincommons/brain-custom-reports.sh ]; then bash $GEN3_HOME/files/scripts/braincommons/brain-custom-reports.sh; else echo "no brain-custom-reports.sh"; fi) > $HOME/brain-custom-reports.log 2>&1 | ||
|
||
|
||
source "${GEN3_HOME}/gen3/gen3setup.sh" | ||
|
||
|
||
# lib ------------------------- | ||
|
||
BEATPD="${GEN3_HOME}/files/scripts/braincommons/beatpd-files.txt" | ||
beatpdFilter() { | ||
while read -r LINE; do | ||
local path | ||
if path="$(awk '{ print $2 }' <<<"$LINE")" && grep "$path" "$BEATPD" > /dev/null 2>&1; then | ||
echo -e "$LINE" | ||
else | ||
gen3_log_info "SKIPPING $LINE - not in beatpd" | ||
fi | ||
done | ||
} | ||
|
||
# main ------------------------ | ||
|
||
# pin start date to January 13 | ||
startDate="2020-01-13" | ||
numDays=0 | ||
|
||
if [[ $# -lt 1 || "$1" != "go" ]]; then | ||
gen3_log_err "Use: brain-custom-reports.sh go" | ||
exit 1 | ||
fi | ||
shift | ||
|
||
#startDate="$1" | ||
startSecs="$(date -u -d"$startDate" +%s)" | ||
endSecs="$(date -u -d"00:00" +%s)" | ||
numDays="$(( (endSecs - startSecs)/(24*60*60) ))" | ||
gen3_log_info "$numDays days since $startDate" | ||
|
||
dropDeadSecs="$(date -u -d2020-05-01 +%s)" | ||
if [[ "$endSecs" -gt "$dropDeadSecs" ]]; then | ||
gen3_log_err "This script will not process logs after 2020-05-01" | ||
exit 1 | ||
fi | ||
|
||
# to simplify testing - optionally take an already existing workfolder | ||
if [[ $# -gt 0 && -f "$1/raw.txt" ]]; then | ||
workFolder="$1" | ||
shift | ||
folderName="$(basename "$workFolder")" | ||
else | ||
folderName="$(date -d"$numDays days ago" -u +%Y%m%d)-$(date -u +%Y%m%d_%H%M%S)" | ||
workFolder="$(mktemp -d -p "$XDG_RUNTIME_DIR" brainCustomReport_XXXXXX)/$folderName" | ||
fi | ||
mkdir -p "$workFolder" | ||
cd "$workFolder" | ||
gen3_log_info "working in $workFolder" | ||
|
||
# cache raw data from last run, and add to it incrementally | ||
cacheDate="2020-03-05" | ||
cacheFile="${XDG_DATA_HOME}/gen3/cache/brain-custom-report_2020-01-13_to_2020-03-05_raw.txt" | ||
if [[ ! -f "$cacheFile" ]]; then | ||
gen3_log_err "Please generate cache $cacheFile : gen3 logs s3 start=2020-01-13 end=2020-03-05 filter=raw prefix=s3://bhcprodv2-data-bucket-logs/log/bhcprodv2-data-bucket/ > brain-custom-report_2020-01-13_to_2020-03-05_raw.txt" | ||
exit 1 | ||
fi | ||
|
||
if [[ -f raw.txt ]]; then | ||
gen3_log_info "using existing raw.txt - probably testing something" | ||
else | ||
gen3 logs s3 start="$cacheDate 00:00" end="00:00" filter=raw prefix=s3://bhcprodv2-data-bucket-logs/log/bhcprodv2-data-bucket/ > "raw-${cacheDate}.txt" | ||
cat "$cacheFile" "raw-${cacheDate}.txt" > "raw.txt" | ||
fi | ||
gen3 logs s3filter filter=accessCount < raw.txt > accessCountRaw.tsv | ||
gen3 logs s3filter filter=whoWhatWhen < raw.txt > whoWhatWhenRaw.tsv | ||
|
||
if dreamReport="$(bash "${GEN3_HOME}/files/scripts/braincommons/dream-access-report-cronjob.sh" "$numDays" | tail -1)" && [[ -f "$dreamReport" ]]; then | ||
gen3_log_info "cp $dreamReport to $workFolder/dream_access_report.tsv" | ||
cp "$dreamReport" dream_access_report.tsv | ||
else | ||
gen3_log_err "Failed to generate Dream access report" | ||
fi | ||
|
||
# Some customization for the brain-commons beat-pd dream challenge case | ||
echo -e "Access_count\tdid\tfilename" > accessCountBrain.tsv | ||
grep dg.7519/ accessCountRaw.tsv | beatpdFilter | sed -E 's@(dg.7519/.+)/(.+)@\1\t\2@' | tee -a accessCountBrain.tsv | ||
|
||
echo -e "Date_time\tdid\tfilename\tUser_id" > whoWhatWhenBrain.tsv | ||
grep dg.7519/ whoWhatWhenRaw.tsv | beatpdFilter | sed -E 's@(dg.7519/.+)/(.+)@\1\t\2@' | sed 's/__Synapse_ID_/ (Synapse ID)/g' >> whoWhatWhenBrain.tsv | ||
|
||
if [[ -d "$workFolder" ]]; then | ||
gen3 dashboard publish secure "$workFolder" "dreamAccess/$(date -u +%Y)/$folderName" | ||
cd "$XDG_RUNTIME_DIR" | ||
gen3_log_info "cleaning up $workFolder" | ||
/bin/rm -rf "$workFolder" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters