Skip to content

Commit

Permalink
feat(biblio-ref): fixed errors + added docs + generate tests
Browse files Browse the repository at this point in the history
  • Loading branch information
leogail committed Feb 14, 2024
1 parent 3295eb0 commit 6a235a9
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 32 deletions.
2 changes: 1 addition & 1 deletion services/biblio-ref/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# syntax=docker/dockerfile:1.2
FROM python:3.9-slim-bullseye as build1
WORKDIR /dvc
RUN apt update && apt -y install git curl
RUN apt update && apt -y install git
RUN pip install dvc[webdav]==3.39.0
RUN --mount=type=secret,id=webdav_login \
--mount=type=secret,id=webdav_password \
Expand Down
12 changes: 9 additions & 3 deletions services/biblio-ref/examples.http
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,24 @@ Content-Type: application/json

[
{
"value": "Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9"
"value": "J. A. TORRES, A. GARCIA-FUENTES, C. SALAZAR, E. CANO & F. VALLE - Caracterizacion de los pinares de Pil1l1s halepellsis Mill. en el sur de la Peninsula Iberica"
},
{
"value": "Please see https://doi.org/10.1016\/b978-0-323-90638-8.00002-3"
"value":"Y. B. LINHART. L. CHAOUNI-BENABDALLAH, J.-M. PARRY & J. D. THOMPSON - Selective herbivory of thyme chemotypes by a mollusk and a grasshopper"
},
{
"value": "works with just a doi : https://doi.org/10.1016\/b978-0-323-90638-8.00002-3"
},
{
"value": "Gerris Caucasicus, Primary Prevention of Cardiovascular Disease with a Mediterranean Diet, 10.1056/nejmoa1200303"
},
{
"value": "RETRACTED > Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9"
},
{
"value": ["bad","input","type"]
},
{
"value": "nothing here"
}
]
]
27 changes: 23 additions & 4 deletions services/biblio-ref/tests.hurl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@ POST {{host}}/v1/validate?indent=true
content-type: application/json
[
{
"value": "Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9"
"value": "J. A. TORRES, A. GARCIA-FUENTES, C. SALAZAR, E. CANO & F. VALLE - Caracterizacion de los pinares de Pil1l1s halepellsis Mill. en el sur de la Peninsula Iberica"
},
{
"value": "Please see https://doi.org/10.1016\/b978-0-323-90638-8.00002-3"
"value":"Y. B. LINHART. L. CHAOUNI-BENABDALLAH, J.-M. PARRY & J. D. THOMPSON - Selective herbivory of thyme chemotypes by a mollusk and a grasshopper"
},
{
"value": "works with just a doi : https://doi.org/10.1016\/b978-0-323-90638-8.00002-3"
},
{
"value": "Gerris Caucasicus, Primary Prevention of Cardiovascular Disease with a Mediterranean Diet, 10.1056/nejmoa1200303"
},
{
"value": "RETRACTED > Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9"
},
{
"value": ["bad","input","type"]
},
Expand All @@ -18,11 +24,18 @@ content-type: application/json
}
]


HTTP 200
[{
"value": {
"doi": "",
"status": "not_found"
"doi": "10.3406/ecmed.1999.1878",
"status": "found"
}
},
{
"value": {
"doi": "10.3406/ecmed.1999.1879",
"status": "found"
}
},
{
Expand All @@ -37,6 +50,12 @@ HTTP 200
"status": "retracted"
}
},
{
"value": {
"doi": "",
"status": "not_found"
}
},
{
"value": {
"doi": "",
Expand Down
16 changes: 14 additions & 2 deletions services/biblio-ref/v1/validate.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,29 @@ mimeType = application/json

post.operationId = post-v1-validate
post.summary = Valide une référence bibliographique
post.description = Si un DOI est trouvé dans la référence bibliographique, valide la référence et indique si elle est rétractée
post.description = Utilise crossref pour valider la référence bibliographique ainsi que de donner le doi si il existe et enfin indique si elle est rétractée.
post.tags.0 = biblio-ref
post.requestBody.content.application/json.schema.$ref = #/components/schemas/JSONStream
post.requestBody.required = true
post.responses.default.content.application/json.schema.$ref = #/components/schemas/JSONStream
post.responses.default.description = Les champs value contiennent un json constitués des champs `is_found` e `is_retracted`.
post.responses.default.description = Les champs value contiennent un json constitués des champs `doi` et `status`. Status prend plusieurs valeur `found`si la référence est trouvée et valide, `not_found` si elle est halucinée ou non présente dans Crossref, `retracted` si elle est rétractée. Indique les erreurs possibles `error_service` ou `error_data`.
post.parameters.0.description = Indenter le JSON résultant
post.parameters.0.in = query
post.parameters.0.name = indent
post.parameters.0.schema.type = boolean

post.requestBody.content.application/json.example.0.id = 1
post.requestBody.content.application/json.example.0.value = Y. B. LINHART. L. CHAOUNI-BENABDALLAH, J.-M. PARRY & J. D. THOMPSON - Selective herbivory of thyme chemotypes by a mollusk and a grasshopper
post.requestBody.content.application/json.example.0.id = 2
post.requestBody.content.application/json.example.0.value = J. A. TORRES, A. GARCIA-FUENTES, C. SALAZAR, E. CANO & F. VALLE, Caracterizacion de los pinares de Pil1l1s halepellsis Mill. en el sur de la Peninsula Iberica
post.requestBody.content.application/json.example.0.id = 3
post.requestBody.content.application/json.example.0.value = works with just a doi : https://doi.org/10.1016\/b978-0-323-90638-8.00002-3
post.requestBody.content.application/json.example.0.id = 4
post.requestBody.content.application/json.example.0.value = RETRACTED > Zohuri, B. (2019). A Comparison of Molten Salt Reactors to Light Water Reactors: Pros and Cons. In Molten Salt Reactors and Thorium Energy (pp. 81-98). Woodhead Publishing. https://doi.org/10.1016/B978-0-08-102337-2.00006-9
post.requestBody.content.application/json.example.0.id = 5
post.requestBody.content.application/json.example.0.value = Gerris Caucasicus, Primary Prevention of Cardiovascular Disease with a Mediterranean Diet, 10.1056/nejmoa1200303
post.requestBody.content.application/json.example.0.id = 6
post.requestBody.content.application/json.example.0.value = ["bad","input","type"]

[use]
plugin = @ezs/spawn
Expand Down
28 changes: 6 additions & 22 deletions services/biblio-ref/v1/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,25 +117,6 @@ def get_title_authors_doi(message):
first_author_given = ""
return {'title': title, 'first_author_given': first_author_given, 'first_author_name': first_author_name, 'doi': doi}

def match_title(title, ref_biblio):
"""
Match the title of the publication with the title of the biblio reference.
Args:
title (str): The title of the publication.
ref_biblio (str): The biblio reference.
Returns:
bool: True if the title of the publication matches the title of the biblio reference, False otherwise.
"""
title = uniformize(title)
ref_biblio = uniformize(ref_biblio)

distance = fuzz.partial_ratio(title, ref_biblio)

#thereshold here
return distance > 90

def compare_pubinfo_refbiblio(item,ref_biblio):
"""
Compare informations of one of the crossref publis with the biblio
Expand All @@ -148,9 +129,9 @@ def compare_pubinfo_refbiblio(item,ref_biblio):
tuple (bool, str): True if it's match and whith the doi
"""
# Check first author
if item['first_author_name'] not in ref_biblio:
if uniformize(item['first_author_name']) not in ref_biblio:
return False, ""
if not match_title(item['title'], ref_biblio):
if fuzz.partial_ratio(uniformize(item['title']), ref_biblio)<90:
return False, ""
return True, item['doi']

Expand All @@ -165,6 +146,7 @@ def verify_biblio(ref_biblio, mail=mail_adress):
Returns :
a confidence score about the existence + doi of the biblio ref
"""
ref_biblio = uniformize(ref_biblio)
url = f'https://api.crossref.org/works?query.bibliographic="{ref_biblio}"&mailto={mail}&rows=5'
try:
response = session.get(url)
Expand All @@ -178,6 +160,8 @@ def verify_biblio(ref_biblio, mail=mail_adress):
# compare pub_info with ref_biblio
match_item, doi = compare_pubinfo_refbiblio(item_info,ref_biblio)
if match_item:
if doi in retracted_doi:
return "retracted",doi
return "found",doi

return "not_found",""
Expand Down Expand Up @@ -207,7 +191,7 @@ def verify_biblio(ref_biblio, mail=mail_adress):
json.dump(data, sys.stdout)
sys.stdout.write("\n")

elif crossref_status_code==404: # If request return code 404
elif crossref_status_code==404: # If request return code 404, check the title dans author
status,doi = verify_biblio(ref_biblio)
data["value"] = {"doi":doi, "status": status}

Expand Down

0 comments on commit 6a235a9

Please sign in to comment.