From b1eb9e27871375d7e5e42516c7e9d0ff32fcc5fd Mon Sep 17 00:00:00 2001 From: Abdul Samad Siddiqui Date: Mon, 4 Mar 2024 22:36:33 +0500 Subject: [PATCH] [ENH] Added Flag to save `LogFile` (#281) * Added Flag to save `LogFile` * refactored `add_data_to_graph.sh` * updated print statement * Update usage instructions * Refactor the comment * refactored case statement * Refactored logging logic Co-authored-by: Alyssa Dai --------- Signed-off-by: samadpls Co-authored-by: Alyssa Dai --- add_data_to_graph.sh | 160 +++++++++++++++++++++++++------------------ 1 file changed, 94 insertions(+), 66 deletions(-) diff --git a/add_data_to_graph.sh b/add_data_to_graph.sh index db942a4..aa6c4c3 100755 --- a/add_data_to_graph.sh +++ b/add_data_to_graph.sh @@ -8,6 +8,8 @@ # ARG_POSITIONAL_SINGLE([password],[Password for graph database user]) # ARG_OPTIONAL_BOOLEAN([clear-data],[],[Whether or not to first clear all existing data from the graph database],[off]) # ARG_OPTIONAL_BOOLEAN([use-graphdb-syntax],[],[Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database.],[off]) +# ARG_OPTIONAL_BOOLEAN([log-output],[],[Whether or not to write the output to a log file],[off]) +# ARG_OPTIONAL_SINGLE([log-file],[],[Path to the log file],[LOG.txt]) # ARGBASH_GO() # needed because of Argbash --> m4_ignore([ ### START OF CODE GENERATED BY Argbash v2.9.0 one line above ### @@ -38,11 +40,13 @@ _positionals=() _arg_clear_data="off" _arg_use_graphdb_syntax="off" +_arg_log_output="off" +_arg_log_file="LOG.txt" print_help() { printf '%s\n' "Upload JSONLD and Turtle data to a Neurobagel graph" - printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-graphdb-syntax] \n' "$0" + printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-graphdb-syntax] [--(no-)log-output] [--log-file ] \n' "$0" printf '\t%s\n' ": Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded." printf '\t%s\n' ": Host and port at which to access the graph database to add data to (e.g., localhost:7200)" printf '\t%s\n' ": Name of graph database to add data to" @@ -51,6 +55,8 @@ print_help() printf '\t%s\n' "-h, --help: Prints help" printf '\t%s\n' "--clear-data, --no-clear-data: Whether or not to first clear all existing data from the graph database (off by default)" printf '\t%s\n' "--use-graphdb-syntax, --no-use-graphdb-syntax: Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database. (off by default)" + printf '\t%s\n' "--log-output, --no-log-output: Whether or not to write the output to a log file (off by default)" + printf '\t%s\n' "--log-file: Path to the log file (default: 'LOG.txt')" } @@ -77,6 +83,18 @@ parse_commandline() _arg_use_graphdb_syntax="on" test "${1:0:5}" = "--no-" && _arg_use_graphdb_syntax="off" ;; + --no-log-output|--log-output) + _arg_log_output="on" + test "${1:0:5}" = "--no-" && _arg_log_output="off" + ;; + --log-file) + test $# -lt 2 && die "Missing value for the optional argument '$_key'." 1 + _arg_log_file="$2" + shift + ;; + --log-file=*) + _arg_log_file="${_key##--log-file=}" + ;; *) _last_positional="$1" _positionals+=("$_last_positional") @@ -128,6 +146,9 @@ graph_url=$_arg_graph_url clear_data=$_arg_clear_data # value is either on or off (https://argbash.readthedocs.io/en/stable/guide.html#optional-arguments) use_graphdb_syntax=$_arg_use_graphdb_syntax +log_output=$_arg_log_output +log_file=$_arg_log_file + DELETE_TRIPLES_QUERY=" DELETE { ?s ?p ?o . @@ -145,71 +166,78 @@ else clear_data_url="${base_url}/update" fi +# Main logic +main() { + # Clear existing data in graph database if requested + if [ "$clear_data" = "on" ]; then + echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..." + + response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ + -X POST $clear_data_url \ + -H "Content-Type: application/sparql-update" \ + --data-binary "${DELETE_TRIPLES_QUERY}") + + # Extract and check status code outputted as final line of response + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + echo -e "\nERROR: Failed to clear ${graph_db}:" + echo "$(sed '$d' <<< "$response")" + echo -e "\nEXITING..." + exit 1 + fi + fi + + # Add data to specified graph database + echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n" + + upload_failed=() + + for db in ${jsonld_dir}/*.jsonld; do + # Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself + [ -e "$db" ] || continue + + echo "$(basename ${db}):" + response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ + -X POST $upload_data_url \ + -H "Content-Type: application/ld+json" \ + --data-binary @${db}) + + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + upload_failed+=("${db}") + fi + # Print rest of response to stdout + echo -e "$(sed '$d' <<< "$response")\n" + done + + for file in ${jsonld_dir}/*.ttl; do + [ -e "$file" ] || continue + + echo "$(basename ${file}):" + response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ + -X POST $upload_data_url \ + -H "Content-Type: text/turtle" \ + --data-binary @${file}) + + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + upload_failed+=("${file}") + fi + echo -e "$(sed '$d' <<< "$response")\n" + done + + echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}." + + if (( ${#upload_failed[@]} != 0 )); then + echo -e "\nERROR: Upload failed for these files:" + printf '%s\n' "${upload_failed[@]}" + fi +} -# Clear existing data in graph database if requested -if [ "$clear_data" = "on" ]; then - echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..." - - response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ - -X POST $clear_data_url \ - -H "Content-Type: application/sparql-update" \ - --data-binary "${DELETE_TRIPLES_QUERY}") - - # Extract and check status code outputted as final line of response - httpcode=$(tail -n1 <<< "$response") - if (( $httpcode < 200 || $httpcode >= 300 )); then - echo -e "\nERROR: Failed to clear ${graph_db}:" - echo "$(sed '$d' <<< "$response")" - echo -e "\nEXITING..." - exit 1 - fi -fi - - -# Add data to specified graph database -echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n" - -upload_failed=() - -for db in ${jsonld_dir}/*.jsonld; do - # Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself - [ -e "$db" ] || continue - - echo "$(basename ${db}):" - response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ - -X POST $upload_data_url \ - -H "Content-Type: application/ld+json" \ - --data-binary @${db}) - - httpcode=$(tail -n1 <<< "$response") - if (( $httpcode < 200 || $httpcode >= 300 )); then - upload_failed+=("${db}") - fi - # Print rest of response to stdout - echo -e "$(sed '$d' <<< "$response")\n" -done - -for file in ${jsonld_dir}/*.ttl; do - [ -e "$file" ] || continue - - echo "$(basename ${file}):" - response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \ - -X POST $upload_data_url \ - -H "Content-Type: text/turtle" \ - --data-binary @${file}) - - httpcode=$(tail -n1 <<< "$response") - if (( $httpcode < 200 || $httpcode >= 300 )); then - upload_failed+=("${file}") - fi - echo -e "$(sed '$d' <<< "$response")\n" -done - -echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}." - -if (( ${#upload_failed[@]} != 0 )); then - echo -e "\nERROR: Upload failed for these files:" - printf '%s\n' "${upload_failed[@]}" +# Call the main logic function with or without output redirection +if [ "$log_output" = "on" ]; then + main > "$log_file" +else + main fi - # ] <-- needed because of Argbash