Skip to content

Commit

Permalink
[ENH] Added Flag to save LogFile (#281)
Browse files Browse the repository at this point in the history
* Added Flag to save `LogFile`

* refactored `add_data_to_graph.sh`

* updated print statement

* Update usage instructions

* Refactor the comment

* refactored case statement

* Refactored logging logic

Co-authored-by: Alyssa Dai <[email protected]>

---------

Signed-off-by: samadpls <[email protected]>
Co-authored-by: Alyssa Dai <[email protected]>
  • Loading branch information
samadpls and alyssadai authored Mar 4, 2024
1 parent 8c74937 commit b1eb9e2
Showing 1 changed file with 94 additions and 66 deletions.
160 changes: 94 additions & 66 deletions add_data_to_graph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# ARG_POSITIONAL_SINGLE([password],[Password for graph database user])
# ARG_OPTIONAL_BOOLEAN([clear-data],[],[Whether or not to first clear all existing data from the graph database],[off])
# ARG_OPTIONAL_BOOLEAN([use-graphdb-syntax],[],[Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database.],[off])
# ARG_OPTIONAL_BOOLEAN([log-output],[],[Whether or not to write the output to a log file],[off])
# ARG_OPTIONAL_SINGLE([log-file],[],[Path to the log file],[LOG.txt])
# ARGBASH_GO()
# needed because of Argbash --> m4_ignore([
### START OF CODE GENERATED BY Argbash v2.9.0 one line above ###
Expand Down Expand Up @@ -38,11 +40,13 @@ _positionals=()
_arg_clear_data="off"
_arg_use_graphdb_syntax="off"

_arg_log_output="off"
_arg_log_file="LOG.txt"

print_help()
{
printf '%s\n' "Upload JSONLD and Turtle data to a Neurobagel graph"
printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-graphdb-syntax] <dir> <graph-url> <graph-database> <user> <password>\n' "$0"
printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-graphdb-syntax] [--(no-)log-output] [--log-file <arg>] <dir> <graph-url> <graph-database> <user> <password>\n' "$0"
printf '\t%s\n' "<dir>: Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded."
printf '\t%s\n' "<graph-url>: Host and port at which to access the graph database to add data to (e.g., localhost:7200)"
printf '\t%s\n' "<graph-database>: Name of graph database to add data to"
Expand All @@ -51,6 +55,8 @@ print_help()
printf '\t%s\n' "-h, --help: Prints help"
printf '\t%s\n' "--clear-data, --no-clear-data: Whether or not to first clear all existing data from the graph database (off by default)"
printf '\t%s\n' "--use-graphdb-syntax, --no-use-graphdb-syntax: Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database. (off by default)"
printf '\t%s\n' "--log-output, --no-log-output: Whether or not to write the output to a log file (off by default)"
printf '\t%s\n' "--log-file: Path to the log file (default: 'LOG.txt')"
}


Expand All @@ -77,6 +83,18 @@ parse_commandline()
_arg_use_graphdb_syntax="on"
test "${1:0:5}" = "--no-" && _arg_use_graphdb_syntax="off"
;;
--no-log-output|--log-output)
_arg_log_output="on"
test "${1:0:5}" = "--no-" && _arg_log_output="off"
;;
--log-file)
test $# -lt 2 && die "Missing value for the optional argument '$_key'." 1
_arg_log_file="$2"
shift
;;
--log-file=*)
_arg_log_file="${_key##--log-file=}"
;;
*)
_last_positional="$1"
_positionals+=("$_last_positional")
Expand Down Expand Up @@ -128,6 +146,9 @@ graph_url=$_arg_graph_url
clear_data=$_arg_clear_data # value is either on or off (https://argbash.readthedocs.io/en/stable/guide.html#optional-arguments)
use_graphdb_syntax=$_arg_use_graphdb_syntax

log_output=$_arg_log_output
log_file=$_arg_log_file

DELETE_TRIPLES_QUERY="
DELETE {
?s ?p ?o .
Expand All @@ -145,71 +166,78 @@ else
clear_data_url="${base_url}/update"
fi

# Main logic
main() {
# Clear existing data in graph database if requested
if [ "$clear_data" = "on" ]; then
echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..."

response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $clear_data_url \
-H "Content-Type: application/sparql-update" \
--data-binary "${DELETE_TRIPLES_QUERY}")

# Extract and check status code outputted as final line of response
httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
echo -e "\nERROR: Failed to clear ${graph_db}:"
echo "$(sed '$d' <<< "$response")"
echo -e "\nEXITING..."
exit 1
fi
fi

# Add data to specified graph database
echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n"

upload_failed=()

for db in ${jsonld_dir}/*.jsonld; do
# Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself
[ -e "$db" ] || continue

echo "$(basename ${db}):"
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $upload_data_url \
-H "Content-Type: application/ld+json" \
--data-binary @${db})

httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
upload_failed+=("${db}")
fi
# Print rest of response to stdout
echo -e "$(sed '$d' <<< "$response")\n"
done

for file in ${jsonld_dir}/*.ttl; do
[ -e "$file" ] || continue

echo "$(basename ${file}):"
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $upload_data_url \
-H "Content-Type: text/turtle" \
--data-binary @${file})

httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
upload_failed+=("${file}")
fi
echo -e "$(sed '$d' <<< "$response")\n"
done

echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}."

if (( ${#upload_failed[@]} != 0 )); then
echo -e "\nERROR: Upload failed for these files:"
printf '%s\n' "${upload_failed[@]}"
fi
}

# Clear existing data in graph database if requested
if [ "$clear_data" = "on" ]; then
echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..."

response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $clear_data_url \
-H "Content-Type: application/sparql-update" \
--data-binary "${DELETE_TRIPLES_QUERY}")

# Extract and check status code outputted as final line of response
httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
echo -e "\nERROR: Failed to clear ${graph_db}:"
echo "$(sed '$d' <<< "$response")"
echo -e "\nEXITING..."
exit 1
fi
fi


# Add data to specified graph database
echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n"

upload_failed=()

for db in ${jsonld_dir}/*.jsonld; do
# Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself
[ -e "$db" ] || continue

echo "$(basename ${db}):"
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $upload_data_url \
-H "Content-Type: application/ld+json" \
--data-binary @${db})

httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
upload_failed+=("${db}")
fi
# Print rest of response to stdout
echo -e "$(sed '$d' <<< "$response")\n"
done

for file in ${jsonld_dir}/*.ttl; do
[ -e "$file" ] || continue

echo "$(basename ${file}):"
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $upload_data_url \
-H "Content-Type: text/turtle" \
--data-binary @${file})

httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
upload_failed+=("${file}")
fi
echo -e "$(sed '$d' <<< "$response")\n"
done

echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}."

if (( ${#upload_failed[@]} != 0 )); then
echo -e "\nERROR: Upload failed for these files:"
printf '%s\n' "${upload_failed[@]}"
# Call the main logic function with or without output redirection
if [ "$log_output" = "on" ]; then
main > "$log_file"
else
main
fi

# ] <-- needed because of Argbash

0 comments on commit b1eb9e2

Please sign in to comment.