Skip to content

Commit

Permalink
Merge pull request #1138 from research-software-directory/1071-add-eu…
Browse files Browse the repository at this point in the history
…rope-op-doi

Support for Publications Office of the European Union
  • Loading branch information
ewan-escience authored Mar 5, 2024
2 parents 7b4ba85 + b2b5cfb commit 48f26a3
Show file tree
Hide file tree
Showing 14 changed files with 258 additions and 33 deletions.
3 changes: 3 additions & 0 deletions data-generation/real-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ const dois = [
'10.1175/BAMS-D-19-0337.1',
'10.1186/s12966-019-0834-1',
'10.1515/itit-2019-0040',
'10.2777/28598',
'10.2788/50967',
'10.2788/52504',
'10.4233/uuid:4bb38399-9267-428f-b10a-80b86e101f23',
'10.5194/egusphere-egu21-4805',
'10.5194/ems2022-105',
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ services:

scrapers:
build: ./scrapers
image: rsd/scrapers:1.6.0
image: rsd/scrapers:1.7.0
environment:
# it uses values from .env file
- POSTGREST_URL
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center)
// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all)
// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
// SPDX-FileCopyrightText: 2023 dv4all
// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
//
// SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -82,7 +83,7 @@ export default function ImportReportBody({initialResults,onCancel,onImport}: Bul
return 'DOI not found'
case 'unsupportedRA':
return 'Registration agent (RA) is not supported'
case 'alredyImported':
case 'alreadyImported':
return 'This publication is already imported'
default:
return 'Unknown error'
Expand Down
21 changes: 19 additions & 2 deletions frontend/components/mention/ImportMentions/apiImportMentions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {useState} from 'react'
import {extractSearchTerm} from '~/components/software/edit/mentions/utils'
import {SearchResult} from '.'
import {getMentionsByDoiFromRsd} from '~/utils/editMentions'
import {getDoiRAList, getItemsFromCrossref, getItemsFromDatacite} from '~/utils/getDOI'
import {getDoiRAList, getItemsFromCrossref, getItemsFromDatacite, getItemsFromOpenAlex} from '~/utils/getDOI'
import {MentionItemProps} from '~/types/Mention'
import {createJsonHeaders, extractReturnMessage} from '~/utils/fetchHelpers'
import useEditMentionReducer from '../useEditMentionReducer'
Expand Down Expand Up @@ -53,7 +53,7 @@ export async function validateInputList(doiList: string[], mentions: MentionItem
const found = mentions.find(mention => mention.doi?.toLowerCase() === doi)
if (found) {
// flag item with DOI alredy processed
mentionResultPerDoi.set(doi, {doi ,status: 'alredyImported', include: false})
mentionResultPerDoi.set(doi, {doi ,status: 'alreadyImported', include: false})
return false
}
return true
Expand Down Expand Up @@ -96,6 +96,7 @@ export async function validateInputList(doiList: string[], mentions: MentionItem
// classify dois by RA
const crossrefDois: string[] = []
const dataciteDois: string[] = []
const openalexDois: string[] = []
doiRas.forEach(doiRa => {
const doi = doiRa.DOI.toLowerCase()
if (typeof doiRa?.RA === 'undefined') {
Expand All @@ -105,6 +106,8 @@ export async function validateInputList(doiList: string[], mentions: MentionItem
crossrefDois.push(doi)
} else if (doiRa.RA === 'DataCite') {
dataciteDois.push(doi)
} else if (doiRa.RA === 'OP') {
openalexDois.push(doi)
} else {
mentionResultPerDoi.set(doi, {doi, status: 'unsupportedRA', include: false})
}
Expand Down Expand Up @@ -142,6 +145,20 @@ export async function validateInputList(doiList: string[], mentions: MentionItem
}
})

const openalexMentions = await getItemsFromOpenAlex(openalexDois)
openalexMentions.forEach(mention => {
if (mention.doi !== null) {
const doi = mention.doi.toLowerCase()
mentionResultPerDoi.set(doi, {
doi,
status: 'valid',
source: 'OpenAlex',
include: true,
mention
})
}
})

// flag dois that are not updated
doisNotInDatabase.forEach(doi => {
if (!mentionResultPerDoi.has(doi)) {
Expand Down
8 changes: 4 additions & 4 deletions frontend/components/mention/ImportMentions/index.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center)
// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all)
// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
// SPDX-FileCopyrightText: 2023 dv4all
//
// SPDX-License-Identifier: Apache-2.0
Expand All @@ -23,9 +23,9 @@ import {DoiBulkImportReport, addMentions, linkMentionToEntity} from './apiImport

export type SearchResult = {
doi: string
status: 'valid' | 'invalidDoi' | 'doiNotFound' |'unsupportedRA' | 'alredyImported' | 'unknown',
status: 'valid' | 'invalidDoi' | 'doiNotFound' |'unsupportedRA' | 'alreadyImported' | 'unknown',
include: boolean
source?: 'RSD' | 'Crossref' | 'DataCite',
source?: 'RSD' | 'Crossref' | 'DataCite' | 'OpenAlex',
mention?: MentionItemProps
}

Expand Down
4 changes: 3 additions & 1 deletion frontend/utils/fetchHelpers.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all)
// SPDX-FileCopyrightText: 2022 dv4all
// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
//
// SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -42,7 +44,7 @@ export async function extractReturnMessage(resp: Response, dataId?: string) {
status: resp.status,
message: `
${resp.statusText}.
You might not have sufficient priveleges to edit this item.
You might not have sufficient privileges to edit this item.
Please contact site administrators.
`
}
Expand Down
6 changes: 3 additions & 3 deletions frontend/utils/getCrossref.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center
// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center
// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all)
// SPDX-FileCopyrightText: 2022 dv4all
//
Expand Down Expand Up @@ -142,7 +142,7 @@ export async function getCrossrefItemsByQuery(query: string) {
}
}

function crossrefToRsdType(type: string): MentionTypeKeys {
export function crossrefToRsdType(type: string): MentionTypeKeys {
if (!type) return 'other'
switch (type.trim().toLowerCase()) {
case 'book':
Expand Down
31 changes: 29 additions & 2 deletions frontend/utils/getDOI.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
// SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all)
// SPDX-FileCopyrightText: 2022 - 2023 dv4all
// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
//
// SPDX-License-Identifier: Apache-2.0

import {MentionItemProps} from '~/types/Mention'
import {crossrefItemToMentionItem, getCrossrefItemByDoi} from './getCrossref'
import {dataCiteGraphQLItemToMentionItem, getDataciteItemByDoiGraphQL, getDataciteItemsByDoiGraphQL} from './getDataCite'
import logger from './logger'
import {getOpenalexItemByDoi, getOpenalexItemsByDoi, openalexItemToMentionItem} from '~/utils/getOpenalex'

type DoiRA = {
DOI: string,
Expand Down Expand Up @@ -155,6 +156,30 @@ export async function getItemsFromDatacite(dois: string[]) {
return mentions
}

async function getItemFromOpenalex(doi: string) {
const resp = await getOpenalexItemByDoi(doi)
// debugger
if (resp.status === 200) {
const mention = openalexItemToMentionItem(resp.message)
return {
status: 200,
message: mention
}
}
// return error message
return resp
}

export async function getItemsFromOpenAlex(dois: string[]): Promise<MentionItemProps[]> {
if (dois.length === 0) {
return []
}

const response = await getOpenalexItemsByDoi(dois)

return response.message.map((rawMention: any) => openalexItemToMentionItem(rawMention))
}

export async function getMentionByDoi(doi: string) {
// get RA first
const doiRA = await getDoiRA(doi)
Expand All @@ -167,6 +192,8 @@ export async function getMentionByDoi(doi: string) {
case 'datacite':
// get from datacite
return getItemFromDatacite(doi)
case 'op':
return getItemFromOpenalex(doi)
default:
return {
status: 400,
Expand Down
94 changes: 94 additions & 0 deletions frontend/utils/getOpenalex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
//
// SPDX-License-Identifier: Apache-2.0

import logger from '~/utils/logger'
import {MentionItemProps} from '~/types/Mention'
import {crossrefToRsdType} from '~/utils/getCrossref'

export async function getOpenalexItemByDoi(doi: string) {
try {
const url = `https://api.openalex.org/works/https://doi.org/${doi}`

const resp = await fetch(url)

if (resp.status === 200) {
const json = await resp.json()
return ({
status: 200,
message: json
})
}
else if (resp.status === 404) {
return {
status: 404,
message: 'DOI not found'
}
}
else {
return ({
status: resp.status,
message: 'unexpected response from OpenAlex'
})
}
} catch (e:any) {
logger(`getOpenalexItemByDoi: ${e?.message}`, 'error')
return {
status: 500,
message: e?.message
}
}
}

export async function getOpenalexItemsByDoi(dois: string[]) {
try {
const url = `https://api.openalex.org/works?filter=doi:${dois.join('|')}`

const resp = await fetch(url)

if (resp.status === 200) {
const json = await resp.json()
return ({
status: 200,
message: json.results
})
}
else {
return ({
status: resp.status,
message: 'unexpected response from OpenAlex'
})
}
} catch (e:any) {
logger(`getOpenalexItemByDoi: ${e?.message}`, 'error')
return {
status: 500,
message: e?.message
}
}
}

export function openalexItemToMentionItem(json: any): MentionItemProps {
return ({
id: null,
doi: json.doi.substring('https://doi.org/'.length),
url: json.doi,
title: json.title,
authors: extractAuthors(json),
publisher: null,
publication_year: json.publication_year,
journal: null,
page: null,
// url to external image
image_url: null,
// is_featured?: boolean
mention_type: crossrefToRsdType(json.type_crossref),
source: 'OpenAlex',
note: null
})
}

function extractAuthors(json: any): string {
return json.authorships.map((authorship: any) => authorship.raw_author_name as string).join(', ')
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <[email protected]>
// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
//
// SPDX-License-Identifier: Apache-2.0

Expand All @@ -10,16 +10,15 @@
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import nl.esciencecenter.rsd.scraper.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.TreeMap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DataCiteReleaseRepository {

private static final Logger LOGGER = LoggerFactory.getLogger(DataCiteReleaseRepository.class);
Expand All @@ -41,7 +40,9 @@ public class DataCiteReleaseRepository {
""";

public Map<String, Collection<MentionRecord>> getVersionedDois(Collection<String> conceptDois) {
if (conceptDois.isEmpty()) return Collections.EMPTY_MAP;
if (conceptDois.isEmpty()) {
return Collections.emptyMap();
}

String query = QUERY_UNFORMATTED.formatted(DataciteMentionRepository.joinCollection(conceptDois));
JsonObject body = new JsonObject();
Expand Down
Loading

0 comments on commit 48f26a3

Please sign in to comment.