From 8bed86eec8cfd3925db2b7c94b88e917b0c8751c Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sat, 30 Dec 2023 21:23:38 -0500 Subject: [PATCH] Change the retractions API and resource name --- indra/literature/pubmed_client.py | 11 +++++------ .../{pmid_retractions.tsv => pubmed_retractions.tsv} | 0 indra/tests/test_pubmed_client.py | 4 ++-- indra/tools/assemble_corpus.py | 5 +++-- 4 files changed, 10 insertions(+), 10 deletions(-) rename indra/resources/{pmid_retractions.tsv => pubmed_retractions.tsv} (100%) diff --git a/indra/literature/pubmed_client.py b/indra/literature/pubmed_client.py index fec6b41533..473e696cf8 100644 --- a/indra/literature/pubmed_client.py +++ b/indra/literature/pubmed_client.py @@ -7,8 +7,6 @@ import os import re import time -from pathlib import Path - import tqdm import logging import random @@ -16,6 +14,7 @@ import requests from time import sleep from typing import List +from pathlib import Path from functools import lru_cache import xml.etree.ElementTree as ET from indra.resources import RESOURCES_PATH @@ -30,7 +29,7 @@ pubmed_archive = "https://ftp.ncbi.nlm.nih.gov/pubmed" pubmed_archive_baseline = pubmed_archive + "/baseline/" pubmed_archive_update = pubmed_archive + "/updatefiles/" -RETRACTIONS_FILE = RESOURCES_PATH + "/pmid_retractions.tsv" +RETRACTIONS_FILE = os.path.join(RESOURCES_PATH, "pubmed_retractions.tsv") # Send request can't be cached by lru_cache because it takes a dict @@ -966,12 +965,12 @@ def get_publication_types(article: ET.Element): return {pt.text for pt in article.find('.//PublicationTypeList')} -def article_is_retracted(pmid: str) -> bool: +def is_retracted(pubmed_id: str) -> bool: """Return True if the article with the given PMID has been retracted. Parameters ---------- - pmid : + pubmed_id : The PMID of the paper to check. Returns @@ -979,7 +978,7 @@ def article_is_retracted(pmid: str) -> bool: : True if the paper has been retracted, False otherwise. """ - return retractions.is_retracted(pmid) + return retractions.is_retracted(pubmed_id) def generate_retractions_file(xml_path: str, download_missing: bool = False): diff --git a/indra/resources/pmid_retractions.tsv b/indra/resources/pubmed_retractions.tsv similarity index 100% rename from indra/resources/pmid_retractions.tsv rename to indra/resources/pubmed_retractions.tsv diff --git a/indra/tests/test_pubmed_client.py b/indra/tests/test_pubmed_client.py index f36d08aed6..e1ee083aa6 100644 --- a/indra/tests/test_pubmed_client.py +++ b/indra/tests/test_pubmed_client.py @@ -243,5 +243,5 @@ def test_get_substance_annotations(): def test_is_retracted(): - assert pubmed_client.article_is_retracted('35463694') - assert not pubmed_client.article_is_retracted('36938926') + assert pubmed_client.is_retracted('35463694') + assert not pubmed_client.is_retracted('36938926') diff --git a/indra/tools/assemble_corpus.py b/indra/tools/assemble_corpus.py index 722b95210b..f1cd15c49f 100644 --- a/indra/tools/assemble_corpus.py +++ b/indra/tools/assemble_corpus.py @@ -23,7 +23,7 @@ from indra.preassembler import Preassembler, flatten_evidence from indra.resources import get_resource_path from indra.statements.validate import print_validation_report -from indra.literature.pubmed_client import article_is_retracted +from indra.literature.pubmed_client import is_retracted import indra.tools.fix_invalidities @@ -1277,6 +1277,7 @@ def filter_retracted_sources(stmts_in, **kwargs): A list of statements to filter. save : Optional[str] The name of a pickle file to save the results (stmts_out) into. + Returns ------- stmts_out : list[indra.statements.Statement] @@ -1287,7 +1288,7 @@ def filter_retracted_sources(stmts_in, **kwargs): ev_out = [] for ev in stmt.evidence: pmid = ev.pmid or ev.text_refs.get('PMID') - if pmid and article_is_retracted(pmid): + if pmid and is_retracted(pmid): continue ev_out.append(ev)