Skip to content

Commit

Permalink
v1.1.4 (#192)
Browse files Browse the repository at this point in the history
- explicitly list cite process cache files in gitignore
- add default values for all `.get()` calls for extra safety and also to
indicate expected type
- change `id` to `_id` in plugins. this was done in the main `cite.py`
script in v1.1.1 but i missed it in the plugins.
- fix dangerous memoize/cache bug where cache key doesn't include the id
of the metasource currently being cited, which affects the output
- orcid plugin: update api to v3
- orcid plugin: fix bug where all ids for single source were being
included. instead, find most recent doi, and fallback to other id type
if not available.
- orcid plugin: keep a few details from api for ids that aren't citable
by manubot
- allow `format_date` util func to accept numeric timestamp (returned
from orcid api)
- tweak member bio page
  • Loading branch information
vincerubinetti authored Apr 27, 2023
1 parent a0f0e26 commit 1092da3
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 54 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ _site
vendor
debug.log
__pycache__
.cache
!cache.db
.DS_STORE
.env*
package.json
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

Reference: common-changelog.org

## 1.1.4 - 2023-04-28

### Changed

- Fix ORCID plugin and other cite process bugs.

## 1.1.3 - 2023-04-20

### Changed
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# citation metadata for the template itself

title: "Lab Website Template"
version: 1.1.3
date-released: 2023-04-20
version: 1.1.4
date-released: 2023-04-28
url: "https://github.com/greenelab/lab-website-template"
authors:
- family-names: "Rubinetti"
Expand Down
Binary file modified _cite/.cache/cache.db
Binary file not shown.
11 changes: 7 additions & 4 deletions _cite/cite.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@

# merge sources with matching (non-blank) ids
for a in range(0, len(sources)):
_id = sources[a].get("id")
_id = sources[a].get("id", "")
if not _id:
continue
for b in range(a + 1, len(sources)):
if sources[b].get("id") == _id:
if sources[b].get("id", "") == _id:
sources[a].update(sources[b])
sources[b] = {}
sources = [entry for entry in sources if entry]
Expand All @@ -109,6 +109,7 @@
# list of new citations
citations = []


# loop through compiled sources
for index, source in enumerate(sources):
log(f"Processing source {index + 1} of {len(sources)}, {label(source)}")
Expand All @@ -130,7 +131,7 @@
# if Manubot cannot cite source
except Exception as e:
# if regular source (id entered by user), throw error
if source.get("plugin") == "sources.py":
if source.get("plugin", "") == "sources.py":
log(e, 3, "ERROR")
error = True
# otherwise, if from metasource (id retrieved from some third-party API), just warn
Expand All @@ -143,7 +144,8 @@
citation.update(source)

# ensure date in proper format for correct date sorting
citation["date"] = format_date(citation.get("date"))
if citation.get("date", ""):
citation["date"] = format_date(citation.get("date", ""))

# add new citation to list
citations.append(citation)
Expand All @@ -153,6 +155,7 @@

log("Saving updated citations")


# save new citations
try:
save_data(output_file, citations)
Expand Down
22 changes: 11 additions & 11 deletions _cite/plugins/google-scholar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,41 @@ def main(entry):
returns list of sources to cite
"""

# get id from entry
id = entry.get("gsid")
if not id:
raise Exception('No "gsid" key')

# get api key
api_key = os.environ.get("GOOGLE_SCHOLAR_API_KEY")
api_key = os.environ.get("GOOGLE_SCHOLAR_API_KEY", "")
if not api_key:
raise Exception('No "GOOGLE_SCHOLAR_API_KEY" env var')

# serp api
params = {
"engine": "google_scholar_author",
"author_id": id,
"api_key": api_key,
"num": 100,
"num": 100, # max allowed
}

# get id from entry
_id = entry.get("gsid", "")
if not _id:
raise Exception('No "gsid" key')

# query api
@log_cache
@cache.memoize(name=__file__, expire=1 * (60 * 60 * 24))
def query():
def query(_id):
params["author_id"] = _id
return GoogleSearch(params).get_dict().get("articles", [])

response = query()
response = query(_id)

# list of sources to return
sources = []

# go through response and format sources
for work in response:

# create source
source = {
"id": work.get("citation_id", ""),
# api does not provide Manubot-citeable id, so keep citation details
"title": work.get("title", ""),
"authors": list(map(str.strip, work.get("authors", "").split(","))),
"publisher": work.get("publication", ""),
Expand Down
94 changes: 77 additions & 17 deletions _cite/plugins/orcid.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,42 +10,102 @@ def main(entry):
"""

# orcid api
endpoint = "https://pub.orcid.org/v2.0/$ORCID/works"
endpoint = "https://pub.orcid.org/v3.0/$ORCID/works"
headers = {"Accept": "application/json"}

# get id from entry
id = entry.get("orcid")
if not id:
_id = entry.get("orcid", "")
if not _id:
raise Exception('No "orcid" key')

# query api
@log_cache
@cache.memoize(name=__file__, expire=1 * (60 * 60 * 24))
def query():
url = endpoint.replace("$ORCID", id)
def query(_id):
url = endpoint.replace("$ORCID", _id)
request = Request(url=url, headers=headers)
response = json.loads(urlopen(request).read())
return response.get("group")
return response.get("group", [])

response = query()
response = query(_id)

# list of sources to return
sources = []

# go through response structure and pull out ids e.g. doi:1234/56789
for work in response:
for id in work["external-ids"]["external-id"]:
# get id and id-type from response
id_type = id["external-id-type"]
id_value = id["external-id-value"]
# get list of ids
ids = work.get("external-ids", {}).get("external-id", [])
for summary in work.get("work-summary", []):
ids = ids + summary.get("external-ids", {}).get("external-id", [])

# create source
source = {"id": f"{id_type}:{id_value}"}
# prefer doi id type, or fallback to first id
_id = next(
(id for id in ids if id.get("external-id-type", "") == "doi"),
ids[0] if len(ids) > 0 else {},
)

# copy fields from entry to source
source.update(entry)
# get id and id-type from response
id_type = _id.get("external-id-type", "")
id_value = _id.get("external-id-value", "")

# add source to list
sources.append(source)
# create source
source = {"id": f"{id_type}:{id_value}"}

# if not a doi, Manubot likely can't cite, so keep citation details
if id_type != "doi":
# get summaries
summaries = work.get("work-summary", [])

# sort summary entries by most recent
summaries = sorted(
summaries,
key=lambda summary: (
summary.get("last-modified-date", {}).get("value", 0)
)
or summary.get("created-date", {}).get("value", 0)
or 0,
reverse=True,
)

# get first summary with defined sub-value
def first(get_func):
return next(value for value in map(get_func, summaries) if value)

# get title
title = first(
lambda s: s.get("title", {}).get("title", {}).get("value", "")
)

# get publisher
publisher = first(lambda s: s.get("journal-title", {}).get("value", ""))

# get date
date = (
work.get("last-modified-date", {}).get("value", 0)
or first(lambda s: s.get("last-modified-date", {}).get("value", 0))
or work.get("created-date", {}).get("value", 0)
or first(lambda s: s.get("created-date", {}).get("value", 0))
or 0
)

# get link
link = first(lambda s: s.get("url", {}).get("value", ""))

# keep available details
if title:
source["title"] = title
if publisher:
source["publisher"] = publisher
if date:
source["date"] = format_date(date)
if link:
source["link"] = link

# copy fields from entry to source
source.update(entry)

# add source to list
sources.append(source)

return sources
16 changes: 8 additions & 8 deletions _cite/plugins/pubmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,28 @@ def main(entry):
endpoint = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$TERM&retmode=json&retmax=1000&usehistory=y"

# get id from entry
id = entry.get("term")
if not id:
_id = entry.get("term", "")
if not _id:
raise Exception('No "term" key')

# query api
@log_cache
@cache.memoize(name=__file__, expire=1 * (60 * 60 * 24))
def query():
url = endpoint.replace("$TERM", quote(id))
def query(_id):
url = endpoint.replace("$TERM", quote(_id))
request = Request(url=url)
response = json.loads(urlopen(request).read())
return response.get("esearchresult", {}).get("idlist")
return response.get("esearchresult", {}).get("idlist", [])

response = query()
response = query(_id)

# list of sources to return
sources = []

# go through response and format sources
for id in response:
for _id in response:
# create source
source = {"id": f"pubmed:{id}"}
source = {"id": f"pubmed:{_id}"}

# copy fields from entry to source
source.update(entry)
Expand Down
10 changes: 6 additions & 4 deletions _cite/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def log(message="\n--------------------\n", indent=0, level="", newline=True):
"SUCCESS": "[black on #10B981]",
"INFO": "[grey70]",
}
color = palette.get(level) or palette.get(indent) or "[white]"
color = palette.get(level, "") or palette.get(indent, "") or "[white]"
if newline:
print()
print(indent * " " + color + str(message) + "[/]", end="", flush=True)
Expand All @@ -76,6 +76,8 @@ def format_date(date):
format date as YYYY-MM-DD, or no date if malformed
"""

if isinstance(date, int):
return datetime.fromtimestamp(date // 1000.0).strftime("%Y-%m-%d")
try:
return datetime.strptime(date, "%Y-%m-%d").strftime("%Y-%m-%d")
except Exception:
Expand Down Expand Up @@ -178,7 +180,7 @@ def cite_with_manubot(_id):

# authors
citation["authors"] = []
for author in manubot.get("author", []):
for author in manubot.get("author", {}):
given = author.get("given", "").strip()
family = author.get("family", "").strip()
if given or family:
Expand All @@ -193,8 +195,8 @@ def cite_with_manubot(_id):
# extract date part
def date_part(citation, index):
try:
return citation.get("issued").get("date-parts")[0][index]
except Exception:
return citation["issued"]["date-parts"][0][index]
except (KeyError, IndexError, TypeError):
return ""

# date
Expand Down
20 changes: 12 additions & 8 deletions _layouts/member.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,20 @@
research/?search={% for alias in aliases %}"{{ alias }}" {% endfor %}
{%- endcapture %}

<a href="{{ search | relative_url }}">
Search for {{ page.name | default: page.title }}'s papers on the Research page
</a>
<p class="center">
<a href="{{ search | relative_url }}">
Search for {{ page.name | default: page.title }}'s papers on the Research page
</a>
</p>

{% capture search -%}
blog/?search={{ page.name }}
{%- endcapture %}

<!--
<a href="{{ search | relative_url }}">
See {{ page.name | default: page.title }}'s posts on the Blog page
</a>
-->
<!--
<p class="center">
<a href="{{ search | relative_url }}">
See {{ page.name | default: page.title }}'s posts on the Blog page
</a>
</p>
-->

0 comments on commit 1092da3

Please sign in to comment.