diff --git a/services/biblio-ref/Dockerfile b/services/biblio-ref/Dockerfile index 77fda25f..c1620bd6 100644 --- a/services/biblio-ref/Dockerfile +++ b/services/biblio-ref/Dockerfile @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1.2 FROM python:3.9-slim-bullseye as build1 WORKDIR /dvc -RUN apt update && apt -y install git curl +RUN apt update && apt -y install git RUN pip install dvc[webdav]==3.39.0 RUN --mount=type=secret,id=webdav_login \ --mount=type=secret,id=webdav_password \ diff --git a/services/biblio-ref/examples.http b/services/biblio-ref/examples.http index 6e96d574..9c106b1d 100644 --- a/services/biblio-ref/examples.http +++ b/services/biblio-ref/examples.http @@ -13,18 +13,24 @@ Content-Type: application/json [ { - "value": "Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9" + "value": "J. A. TORRES, A. GARCIA-FUENTES, C. SALAZAR, E. CANO & F. VALLE - Caracterizacion de los pinares de Pil1l1s halepellsis Mill. en el sur de la Peninsula Iberica" }, { - "value": "Please see https://doi.org/10.1016\/b978-0-323-90638-8.00002-3" + "value":"Y. B. LINHART. L. CHAOUNI-BENABDALLAH, J.-M. PARRY & J. D. THOMPSON - Selective herbivory of thyme chemotypes by a mollusk and a grasshopper" + }, + { + "value": "works with just a doi : https://doi.org/10.1016\/b978-0-323-90638-8.00002-3" }, { "value": "Gerris Caucasicus, Primary Prevention of Cardiovascular Disease with a Mediterranean Diet, 10.1056/nejmoa1200303" }, + { + "value": "RETRACTED > Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9" + }, { "value": ["bad","input","type"] }, { "value": "nothing here" } -] \ No newline at end of file +] diff --git a/services/biblio-ref/tests.hurl b/services/biblio-ref/tests.hurl index 845fcbec..b8e3c322 100644 --- a/services/biblio-ref/tests.hurl +++ b/services/biblio-ref/tests.hurl @@ -2,14 +2,20 @@ POST {{host}}/v1/validate?indent=true content-type: application/json [ { - "value": "Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9" + "value": "J. A. TORRES, A. GARCIA-FUENTES, C. SALAZAR, E. CANO & F. VALLE - Caracterizacion de los pinares de Pil1l1s halepellsis Mill. en el sur de la Peninsula Iberica" }, { - "value": "Please see https://doi.org/10.1016\/b978-0-323-90638-8.00002-3" + "value":"Y. B. LINHART. L. CHAOUNI-BENABDALLAH, J.-M. PARRY & J. D. THOMPSON - Selective herbivory of thyme chemotypes by a mollusk and a grasshopper" + }, + { + "value": "works with just a doi : https://doi.org/10.1016\/b978-0-323-90638-8.00002-3" }, { "value": "Gerris Caucasicus, Primary Prevention of Cardiovascular Disease with a Mediterranean Diet, 10.1056/nejmoa1200303" }, + { + "value": "RETRACTED > Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9" + }, { "value": ["bad","input","type"] }, @@ -18,11 +24,18 @@ content-type: application/json } ] + HTTP 200 [{ "value": { - "doi": "", - "status": "not_found" + "doi": "10.3406/ecmed.1999.1878", + "status": "found" + } +}, +{ + "value": { + "doi": "10.3406/ecmed.1999.1879", + "status": "found" } }, { @@ -37,6 +50,12 @@ HTTP 200 "status": "retracted" } }, +{ + "value": { + "doi": "", + "status": "not_found" + } +}, { "value": { "doi": "", diff --git a/services/biblio-ref/v1/validate.ini b/services/biblio-ref/v1/validate.ini index 3ee0f350..d784f5df 100644 --- a/services/biblio-ref/v1/validate.ini +++ b/services/biblio-ref/v1/validate.ini @@ -3,17 +3,29 @@ mimeType = application/json post.operationId = post-v1-validate post.summary = Valide une référence bibliographique -post.description = Si un DOI est trouvé dans la référence bibliographique, valide la référence et indique si elle est rétractée +post.description = Utilise crossref pour valider la référence bibliographique ainsi que de donner le doi si il existe et enfin indique si elle est rétractée. post.tags.0 = biblio-ref post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream post.requestBody.required = true post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream -post.responses.default.description = Les champs value contiennent un json constitués des champs `is_found` e `is_retracted`. +post.responses.default.description = Les champs value contiennent un json constitués des champs `doi` et `status`. Status prend plusieurs valeur `found`si la référence est trouvée et valide, `not_found` si elle est halucinée ou non présente dans Crossref, `retracted` si elle est rétractée. Indique les erreurs possibles `error_service` ou `error_data`. post.parameters.0.description = Indenter le JSON résultant post.parameters.0.in = query post.parameters.0.name = indent post.parameters.0.schema.type = boolean +post.requestBody.content.application/json.example.0.id = 1 +post.requestBody.content.application/json.example.0.value = Y. B. LINHART. L. CHAOUNI-BENABDALLAH, J.-M. PARRY & J. D. THOMPSON - Selective herbivory of thyme chemotypes by a mollusk and a grasshopper +post.requestBody.content.application/json.example.0.id = 2 +post.requestBody.content.application/json.example.0.value = J. A. TORRES, A. GARCIA-FUENTES, C. SALAZAR, E. CANO & F. VALLE, Caracterizacion de los pinares de Pil1l1s halepellsis Mill. en el sur de la Peninsula Iberica +post.requestBody.content.application/json.example.0.id = 3 +post.requestBody.content.application/json.example.0.value = works with just a doi : https://doi.org/10.1016\/b978-0-323-90638-8.00002-3 +post.requestBody.content.application/json.example.0.id = 4 +post.requestBody.content.application/json.example.0.value = RETRACTED > Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9 +post.requestBody.content.application/json.example.0.id = 5 +post.requestBody.content.application/json.example.0.value = Gerris Caucasicus, Primary Prevention of Cardiovascular Disease with a Mediterranean Diet, 10.1056/nejmoa1200303 +post.requestBody.content.application/json.example.0.id = 6 +post.requestBody.content.application/json.example.0.value = ["bad","input","type"] [use] plugin = @ezs/spawn diff --git a/services/biblio-ref/v1/validate.py b/services/biblio-ref/v1/validate.py index 045175d3..f30c7765 100755 --- a/services/biblio-ref/v1/validate.py +++ b/services/biblio-ref/v1/validate.py @@ -117,25 +117,6 @@ def get_title_authors_doi(message): first_author_given = "" return {'title': title, 'first_author_given': first_author_given, 'first_author_name': first_author_name, 'doi': doi} -def match_title(title, ref_biblio): - """ - Match the title of the publication with the title of the biblio reference. - - Args: - title (str): The title of the publication. - ref_biblio (str): The biblio reference. - - Returns: - bool: True if the title of the publication matches the title of the biblio reference, False otherwise. - """ - title = uniformize(title) - ref_biblio = uniformize(ref_biblio) - - distance = fuzz.partial_ratio(title, ref_biblio) - - #thereshold here - return distance > 90 - def compare_pubinfo_refbiblio(item,ref_biblio): """ Compare informations of one of the crossref publis with the biblio @@ -148,9 +129,9 @@ def compare_pubinfo_refbiblio(item,ref_biblio): tuple (bool, str): True if it's match and whith the doi """ # Check first author - if item['first_author_name'] not in ref_biblio: + if uniformize(item['first_author_name']) not in ref_biblio: return False, "" - if not match_title(item['title'], ref_biblio): + if fuzz.partial_ratio(uniformize(item['title']), ref_biblio)<90: return False, "" return True, item['doi'] @@ -165,6 +146,7 @@ def verify_biblio(ref_biblio, mail=mail_adress): Returns : a confidence score about the existence + doi of the biblio ref """ + ref_biblio = uniformize(ref_biblio) url = f'https://api.crossref.org/works?query.bibliographic="{ref_biblio}"&mailto={mail}&rows=5' try: response = session.get(url) @@ -178,6 +160,8 @@ def verify_biblio(ref_biblio, mail=mail_adress): # compare pub_info with ref_biblio match_item, doi = compare_pubinfo_refbiblio(item_info,ref_biblio) if match_item: + if doi in retracted_doi: + return "retracted",doi return "found",doi return "not_found","" @@ -207,7 +191,7 @@ def verify_biblio(ref_biblio, mail=mail_adress): json.dump(data, sys.stdout) sys.stdout.write("\n") - elif crossref_status_code==404: # If request return code 404 + elif crossref_status_code==404: # If request return code 404, check the title dans author status,doi = verify_biblio(ref_biblio) data["value"] = {"doi":doi, "status": status}