Skip to content

Commit

Permalink
Trust Crossref search engine. Paper = first match.
Browse files Browse the repository at this point in the history
  • Loading branch information
dougy147 committed May 15, 2024
1 parent 8d4d2b8 commit 8090a3d
Showing 1 changed file with 5 additions and 24 deletions.
29 changes: 5 additions & 24 deletions bin/scitopdf
Original file line number Diff line number Diff line change
Expand Up @@ -199,30 +199,11 @@ change_headers() {
#-------------------------#
search_crossref() {
change_headers
if [[ $(echo $(curl -A "$headers" -s "https://search.crossref.org/?q=$user_search&from_ui=yes" | tee "$scitopdf_curl".txt) | grep -io "$user_search" | head -n 1) == "$user_search" ]]; then
doi="$(grep -io "https\?://doi.*" "$scitopdf_curl".txt | grep -io "doi.*" | sed 's/https\?:\/\///' | grep -io "/.*" | sed -e 's,/,,' -e 's,)$,,' | sed "s/['<>]//g" | head -n 2 | tail -n 1)"
doi_search
download_link="$(curl -L -s "$site/$doi" | grep -ioP "(?<=src=\")[^'\"]+?.pdf" | head -n 1 | sed 's/^\/\/\/// ; s/^\/\/// ; s/^\///')"
check_website_url_inside_link
else
[[ ! $quiet = true ]] && echo "${_YELLOW}User search not found on Crossref.${_RESET}"
return
fi
}
#--------------------------#
# Look for DOI on Crossref #
#--------------------------#
doi_search() {
if [[ -n "$doi" ]]; then
if [[ $(echo "$doi" | grep -io "doi:") || $(echo $doi | grep -io "doi\.") ]]; then
doi=$(echo $doi | sed 's/http\?:\/\///' | grep -io "/.*" | sed "s/\///")
[[ ! $quiet = true ]] && echo "${_BOLD}DOI: ${_RESET}$doi"
fi
[[ ! $quiet = true ]] && echo "${_BOLD}DOI: ${_RESET}$doi"
else
[[ ! $quiet = true ]] && echo "${_RED}DOI not found.${_RESET}"
[[ "$listing" = false ]] && exit 2;
fi
curl -A "$headers" -s "https://search.crossref.org/?q=$user_search&from_ui=yes" > "$scitopdf_curl".txt
doi=$(grep "var dois_list" "$scitopdf_curl".txt | sed -E "s/.*= '([^,]*).*/\1/") # get first DOI from "dois_list"
[[ -z ${doi} ]] && doi="$(grep -io "https\?://doi.*" "$scitopdf_curl".txt | grep -io "doi.*" | sed 's/https\?:\/\///' | grep -io "/.*" | sed -e 's,/,,' -e 's,)$,,' | sed "s/['<>]//g" | head -n 2 | tail -n 1)"
download_link="$(curl -L -s "$site/$doi" | grep -ioP "(?<=src=\")[^'\"]+?.pdf" | head -n 1 | sed 's/^\/\/\/// ; s/^\/\/// ; s/^\///')"
check_website_url_inside_link
}
#----------------#
# Look for paper #
Expand Down

0 comments on commit 8090a3d

Please sign in to comment.