From 662814c173d4645d59894fe7beab52ad98429b0d Mon Sep 17 00:00:00 2001 From: Ewan Cahen Date: Thu, 31 Oct 2024 12:23:20 +0100 Subject: [PATCH 1/2] feat: scrape more data from ROR --- database/013-create-organisation-table.sql | 6 +- scrapers/pom.xml | 22 ---- .../nl/esciencecenter/rsd/scraper/Utils.java | 62 ++++++----- .../ror/BasicOrganisationDatabaseData.java | 14 --- .../rsd/scraper/ror/MainRor.java | 13 +-- ...ata.java => OrganisationDatabaseData.java} | 8 +- .../rsd/scraper/ror/RorData.java | 18 +++ .../esciencecenter/rsd/scraper/ror/RorId.java | 74 +++++++++++++ .../scraper/ror/RorPostgrestConnector.java | 38 ++++--- .../rsd/scraper/ror/RorScraper.java | 104 ++++++++++++------ .../rsd/scraper/ror/RorIdTest.java | 85 ++++++++++++++ .../ror/RorPostgrestConnectorTest.java | 15 ++- .../rsd/scraper/ror/RorScraperTest.java | 49 ++------- 13 files changed, 348 insertions(+), 160 deletions(-) delete mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationDatabaseData.java rename scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/{BasicOrganisationData.java => OrganisationDatabaseData.java} (76%) create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorData.java create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorId.java create mode 100644 scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorIdTest.java diff --git a/database/013-create-organisation-table.sql b/database/013-create-organisation-table.sql index 049c8cb2d..72de9b3fe 100644 --- a/database/013-create-organisation-table.sql +++ b/database/013-create-organisation-table.sql @@ -16,11 +16,15 @@ CREATE TABLE organisation ( name VARCHAR(200) NOT NULL, short_description VARCHAR(300), description VARCHAR(10000), - ror_id VARCHAR(100) UNIQUE, + ror_id VARCHAR(100) UNIQUE CHECK (ror_id ~ '^https://ror\.org/(0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2})$'), website VARCHAR(200) UNIQUE, is_tenant BOOLEAN DEFAULT FALSE NOT NULL, country VARCHAR(100), city VARCHAR(100), + wikipedia_url VARCHAR(300), + ror_types VARCHAR(100)[], + lat float8, + lon float8, ror_scraped_at TIMESTAMPTZ, ror_last_error VARCHAR(500), logo_id VARCHAR(40) REFERENCES image(id), diff --git a/scrapers/pom.xml b/scrapers/pom.xml index 3af551acf..f3150a9fd 100644 --- a/scrapers/pom.xml +++ b/scrapers/pom.xml @@ -159,27 +159,5 @@ SPDX-License-Identifier: Apache-2.0 3.2.5 test - - - - org.wiremock - wiremock - 3.4.1 - test - - - - com.fasterxml.jackson.core - jackson-databind - 2.17.2 - - - - com.fasterxml.jackson.dataformat - jackson-dataformat-yaml - 2.17.2 - - - diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java index 27f249ea2..e535faabc 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java @@ -31,6 +31,7 @@ import java.util.Base64; import java.util.Date; import java.util.UUID; +import java.util.function.Function; public class Utils { @@ -90,9 +91,9 @@ public static String get(String uri, String... headers) throws IOException, Inte */ public static HttpResponse getAsHttpResponse(String uri, String... headers) throws IOException, InterruptedException { HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() - .GET() - .timeout(DEFAULT_TIMEOUT) - .uri(URI.create(uri)); + .GET() + .timeout(DEFAULT_TIMEOUT) + .uri(URI.create(uri)); if (headers != null && headers.length > 0 && headers.length % 2 == 0) { httpRequestBuilder.headers(headers); } @@ -112,11 +113,11 @@ public static HttpResponse getAsHttpResponse(String uri, String... heade public static String getAsAdmin(String uri) { String jwtString = adminJwt(); HttpRequest request = HttpRequest.newBuilder() - .GET() - .uri(URI.create(uri)) - .timeout(DEFAULT_TIMEOUT) - .header("Authorization", "Bearer " + jwtString) - .build(); + .GET() + .uri(URI.create(uri)) + .timeout(DEFAULT_TIMEOUT) + .header("Authorization", "Bearer " + jwtString) + .build(); HttpResponse response; @@ -148,9 +149,9 @@ public static String getAsAdmin(String uri) { */ public static String post(String uri, String body, String... extraHeaders) { HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() - .POST(HttpRequest.BodyPublishers.ofString(body)) - .timeout(DEFAULT_TIMEOUT) - .uri(URI.create(uri)); + .POST(HttpRequest.BodyPublishers.ofString(body)) + .timeout(DEFAULT_TIMEOUT) + .uri(URI.create(uri)); if (extraHeaders != null && extraHeaders.length > 0 && extraHeaders.length % 2 == 0) { httpRequestBuilder.headers(extraHeaders); } @@ -183,11 +184,11 @@ public static String post(String uri, String body, String... extraHeaders) { public static String postAsAdmin(String uri, String json, String... extraHeaders) { String jwtString = adminJwt(); HttpRequest.Builder builder = HttpRequest.newBuilder() - .POST(HttpRequest.BodyPublishers.ofString(json)) - .uri(URI.create(uri)) - .timeout(DEFAULT_TIMEOUT) - .header("Content-Type", "application/json") - .header("Authorization", "Bearer " + jwtString); + .POST(HttpRequest.BodyPublishers.ofString(json)) + .uri(URI.create(uri)) + .timeout(DEFAULT_TIMEOUT) + .header("Content-Type", "application/json") + .header("Authorization", "Bearer " + jwtString); if (extraHeaders != null && extraHeaders.length > 0) { builder.headers(extraHeaders); } @@ -281,11 +282,11 @@ static String createPatchUri(String baseuri, String tableName, String primaryKey public static String patchAsAdmin(String uri, String json, String... extraHeaders) { String jwtString = adminJwt(); HttpRequest.Builder builder = HttpRequest.newBuilder() - .method("PATCH", HttpRequest.BodyPublishers.ofString(json)) - .uri(URI.create(uri)) - .timeout(Duration.ofSeconds(30)) - .header("Content-Type", "application/json") - .header("Authorization", "Bearer " + jwtString); + .method("PATCH", HttpRequest.BodyPublishers.ofString(json)) + .uri(URI.create(uri)) + .timeout(Duration.ofSeconds(30)) + .header("Content-Type", "application/json") + .header("Authorization", "Bearer " + jwtString); if (extraHeaders != null && extraHeaders.length > 0) { builder.headers(extraHeaders); } @@ -309,15 +310,24 @@ private static String adminJwt() { String signingSecret = Config.jwtSigningSecret(); Algorithm signingAlgorithm = Algorithm.HMAC256(signingSecret); return JWT.create() - .withClaim("role", "rsd_admin") - .withExpiresAt(new Date(System.currentTimeMillis() + Config.jwtExpirationTime())) - .sign(signingAlgorithm); + .withClaim("role", "rsd_admin") + .withExpiresAt(new Date(System.currentTimeMillis() + Config.jwtExpirationTime())) + .sign(signingAlgorithm); } public static String stringOrNull(JsonElement e) { return e == null || !e.isJsonPrimitive() ? null : e.getAsString(); } + public static T safelyGetOrNull(JsonElement element, Function extractor) { + try { + return extractor.apply(element); + } catch (RuntimeException e) { + LOGGER.warn("Exception extracting data from JSON: " + element, e); + return null; + } + } + public static Integer integerOrNull(JsonElement e) { return e == null || !e.isJsonPrimitive() ? null : e.getAsInt(); } @@ -329,7 +339,9 @@ public static Integer integerOrNull(JsonElement e) { * @return the filter. */ public static String atLeastOneHourAgoFilter(String scrapedAtColumnName) { - String oneHourAgoEncoded = urlEncode(ZonedDateTime.now().minusHours(1).format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); + String oneHourAgoEncoded = urlEncode(ZonedDateTime.now() + .minusHours(1) + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); return "or=(%s.is.null,%s.lte.%s)".formatted(scrapedAtColumnName, scrapedAtColumnName, oneHourAgoEncoded); } } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationDatabaseData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationDatabaseData.java deleted file mode 100644 index f92364f4d..000000000 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationDatabaseData.java +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Christian Meeßen (GFZ) -// SPDX-FileCopyrightText: 2024 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences -// -// SPDX-License-Identifier: Apache-2.0 - -package nl.esciencecenter.rsd.scraper.ror; - -import java.time.ZonedDateTime; - -public record BasicOrganisationDatabaseData( - BasicOrganisationData basicData, - ZonedDateTime rorScrapedAt -) { -} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java index f46471d12..7a6d528dc 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java @@ -33,7 +33,7 @@ public static void main(String[] args) { private static void scrapeLocationData() { RorPostgrestConnector organisationsInRSD = new RorPostgrestConnector(); - Collection organisationsToScrape = organisationsInRSD.organisationsWithoutLocation(SCRAPING_LIMIT); + Collection organisationsToScrape = organisationsInRSD.organisationsWithoutLocation(SCRAPING_LIMIT); CompletableFuture[] futures = new CompletableFuture[organisationsToScrape.size()]; ZonedDateTime scrapedAt = ZonedDateTime.now(); int i = 0; @@ -41,15 +41,12 @@ private static void scrapeLocationData() { String columnName = "ror_last_error"; String primaryKeyName = "id"; String scrapedAtName = "ror_scraped_at"; - for (BasicOrganisationData organisation : organisationsToScrape) { + for (OrganisationDatabaseData organisation : organisationsToScrape) { CompletableFuture future = CompletableFuture.runAsync(() -> { try { - String rorUrl = organisation.rorId().replace("https://ror.org/", "https://api.ror.org/organizations/"); - RorScraper rorScraper = new RorScraper(rorUrl); - String city = rorScraper.city(); - String country = rorScraper.country(); - BasicOrganisationData updatedOrganisationData = new BasicOrganisationData(organisation.id(), organisation.rorId(), country, city); - BasicOrganisationDatabaseData updatedOrganisationDatabaseData = new BasicOrganisationDatabaseData(updatedOrganisationData, scrapedAt); + RorScraper rorScraper = new RorScraper(organisation.rorId()); + RorData data = rorScraper.scrapeData(); + OrganisationDatabaseData updatedOrganisationDatabaseData = new OrganisationDatabaseData(organisation.id(), organisation.rorId(), scrapedAt, data); organisationsInRSD.saveLocationData(updatedOrganisationDatabaseData); } catch (RsdResponseException | IOException | InterruptedException e) { Utils.saveExceptionInDatabase("ROR location scraper", tableName, organisation.id(), e); diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/OrganisationDatabaseData.java similarity index 76% rename from scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java rename to scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/OrganisationDatabaseData.java index ad75c7116..9684f7705 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/OrganisationDatabaseData.java @@ -7,7 +7,13 @@ package nl.esciencecenter.rsd.scraper.ror; +import java.time.ZonedDateTime; import java.util.UUID; -public record BasicOrganisationData(UUID id, String rorId, String country, String city) { +public record OrganisationDatabaseData( + UUID id, + RorId rorId, + ZonedDateTime rorScrapedAt, + RorData data +) { } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorData.java new file mode 100644 index 000000000..e097936c8 --- /dev/null +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorData.java @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.ror; + +import java.util.List; + +public record RorData( + String country, + String city, + String wikipediaUrl, + List rorTypes, + Double lat, + Double lon +) { +} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorId.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorId.java new file mode 100644 index 000000000..93567fc53 --- /dev/null +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorId.java @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.ror; + +import java.net.URI; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class RorId { + + private static final String ROR_BASE_URL = "https://ror.org/"; + private static final String ROR_BASE_API_URL = "https://api.ror.org/organizations/"; + // https://ror.readme.io/docs/identifier + // we haven't implemented the checksum of the last two digits, maybe something to do later; + // but we don't have this check in the database either + private static final Pattern ROR_URL_PATTERN = Pattern.compile("^https://ror\\.org/(0[a-hj-km-np-tv-z|\\d]{6}\\d{2})$"); + + private final String id; + + private RorId(String id) { + this.id = id; + } + + public static boolean isValidRorUrl(String url) { + return url != null && ROR_URL_PATTERN.asPredicate().test(url); + } + + public static RorId fromUrlString(String url) { + if (!isValidRorUrl(url)) { + throw new IllegalArgumentException(); + } + + Matcher matcher = ROR_URL_PATTERN.matcher(url); + matcher.find(); + String id = matcher.group(1); + return new RorId(id); + } + + public URI asUrl() { + return URI.create(ROR_BASE_URL + id); + } + + public URI asApiUrl() { + return URI.create(ROR_BASE_API_URL + id); + } + + @Override + public String toString() { + return ROR_BASE_URL + id; + } + + @Override + public int hashCode() { + return id.hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (this == other) { + return true; + } + if (other instanceof RorId otherRorId) { + return id.equals(otherRorId.id); + } + + return false; + } +} diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnector.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnector.java index 6a4a2e616..a229afed2 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnector.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnector.java @@ -18,6 +18,7 @@ import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collection; +import java.util.List; import java.util.UUID; public class RorPostgrestConnector { @@ -27,33 +28,44 @@ public RorPostgrestConnector() { this.backendUrl = Config.backendBaseUrl(); } - public Collection organisationsWithoutLocation(int limit) { - String filter = "organisation?ror_id=not.is.null&or=(country.is.null,city.is.null)&limit=" + limit; + public Collection organisationsWithoutLocation(int limit) { + String filter = "organisation?ror_id=not.is.null&order=ror_scraped_at.asc.nullsfirst&select=id,ror_id&limit=" + limit; String data = Utils.getAsAdmin(backendUrl + "/" + filter); return parseBasicJsonData(data); } - static Collection parseBasicJsonData(String data) { + static Collection parseBasicJsonData(String data) { JsonArray dataInArray = JsonParser.parseString(data).getAsJsonArray(); - Collection result = new ArrayList<>(); + Collection result = new ArrayList<>(); for (JsonElement element : dataInArray) { JsonObject organisationJson = element.getAsJsonObject(); UUID id = UUID.fromString(organisationJson.getAsJsonPrimitive("id").getAsString()); - String rorId = organisationJson.getAsJsonPrimitive("ror_id").getAsString(); - String country = organisationJson.get("country").isJsonNull() ? null : organisationJson.getAsJsonPrimitive("country").getAsString(); - String city = organisationJson.get("city").isJsonNull() ? null : organisationJson.getAsJsonPrimitive("city").getAsString(); - result.add(new BasicOrganisationData(id, rorId, country, city)); + RorId rorId = RorId.fromUrlString(organisationJson.getAsJsonPrimitive("ror_id").getAsString()); + result.add(new OrganisationDatabaseData(id, rorId, null, null)); } return result; } - public void saveLocationData(BasicOrganisationDatabaseData organisationData) { + public void saveLocationData(OrganisationDatabaseData organisationData) { JsonObject jsonObject = new JsonObject(); - jsonObject.addProperty("country", organisationData.basicData().country()); - jsonObject.addProperty("city", organisationData.basicData().city()); - jsonObject.addProperty("ror_scraped_at", organisationData.rorScrapedAt().format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); + jsonObject.addProperty("country", organisationData.data().country()); + jsonObject.addProperty("city", organisationData.data().city()); + jsonObject.addProperty("wikipedia_url", organisationData.data().wikipediaUrl()); + JsonArray rorTypesJsonArray = new JsonArray(); + List rorTypes = organisationData.data().rorTypes(); + if (rorTypes != null) { + for (String rorType : rorTypes) { + rorTypesJsonArray.add(rorType); + } + } + jsonObject.add("ror_types", rorTypesJsonArray); + jsonObject.addProperty("lat", organisationData.data().lat()); + jsonObject.addProperty("lon", organisationData.data().lon()); + + jsonObject.addProperty("ror_scraped_at", organisationData.rorScrapedAt() + .format(DateTimeFormatter.ISO_OFFSET_DATE_TIME)); jsonObject.add("ror_last_error", JsonNull.INSTANCE); - Utils.patchAsAdmin(backendUrl + "/organisation?id=eq." + organisationData.basicData().id().toString(), jsonObject.toString()); + Utils.patchAsAdmin(backendUrl + "/organisation?id=eq." + organisationData.id(), jsonObject.toString()); } } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorScraper.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorScraper.java index 0b1c097d5..6b4147c33 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorScraper.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/RorScraper.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2024 Christian Meeßen (GFZ) // SPDX-FileCopyrightText: 2024 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences // @@ -7,51 +7,89 @@ package nl.esciencecenter.rsd.scraper.ror; -import java.io.IOException; - -import com.google.gson.JsonArray; import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; import com.google.gson.JsonParser; -import com.google.gson.JsonSyntaxException; - import nl.esciencecenter.rsd.scraper.RsdResponseException; import nl.esciencecenter.rsd.scraper.Utils; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + public class RorScraper { - private String rorApiUrl; - private JsonObject apiObject; + private final RorId rorId; - public RorScraper(String rorApiUrl) throws IOException, InterruptedException, RsdResponseException, JsonParseException, JsonSyntaxException { - this.rorApiUrl = rorApiUrl; - getFromApi(); + public RorScraper(RorId rorId) { + this.rorId = Objects.requireNonNull(rorId); } - private void getFromApi() throws IOException, InterruptedException, RsdResponseException, JsonParseException, JsonSyntaxException { - String jsonResponse = Utils.get(rorApiUrl); - apiObject = JsonParser.parseString(jsonResponse).getAsJsonObject(); + private String getFromApi() throws IOException, InterruptedException, RsdResponseException { + // e.g https://api.ror.org/organizations/04tsk2644 + return Utils.get(rorId.asApiUrl().toString()); } - public String country() { - String country = null; - JsonObject jsonCountry = apiObject.has("country") ? apiObject.get("country").getAsJsonObject() : new JsonObject(); - if (! jsonCountry.isEmpty()) { - JsonElement jsonCountryName = jsonCountry.has("country_name") ? jsonCountry.get("country_name") : new JsonObject(); - country = jsonCountryName.isJsonNull() ? null : jsonCountryName.getAsString(); - } - return country; + public RorData scrapeData() throws RsdResponseException, IOException, InterruptedException { + String json = getFromApi(); + return parseData(json); } - public String city() { - String city = null; - JsonArray jsonAddresses = apiObject.has("addresses") ? apiObject.get("addresses").getAsJsonArray() : new JsonArray(); - if (! jsonAddresses.isEmpty()) { - JsonElement jsonFirstAddress = jsonAddresses.get(0); - JsonElement jsonCity = jsonFirstAddress.isJsonNull() ? new JsonObject() : jsonFirstAddress.getAsJsonObject().get("city"); - city = jsonCity.isJsonNull() ? null : jsonCity.getAsString(); + static RorData parseData(String json) { + JsonElement jsonElement = JsonParser.parseString(json); + final String addressesKey = "addresses"; + + String country = Utils.safelyGetOrNull(jsonElement, j -> j + .getAsJsonObject() + .getAsJsonObject("country") + .getAsJsonPrimitive("country_name") + .getAsString()); + String city = Utils.safelyGetOrNull(jsonElement, j -> j + .getAsJsonObject() + .getAsJsonArray(addressesKey) + .get(0) + .getAsJsonObject() + .getAsJsonPrimitive("city") + .getAsString()); + String wikipediaUrl = Utils.safelyGetOrNull(jsonElement, j -> { + JsonElement wikiElement = j + .getAsJsonObject() + .get("wikipedia_url"); + return wikiElement.isJsonPrimitive() ? wikiElement.getAsString() : null; + }); + if (wikipediaUrl != null && wikipediaUrl.isBlank()) { + wikipediaUrl = null; } - return city; + List rorTypes = Utils.safelyGetOrNull(jsonElement, j -> j + .getAsJsonObject() + .getAsJsonArray("types") + .asList() + .stream() + .map(JsonElement::getAsString) + .toList() + ); + Double lat = Utils.safelyGetOrNull(jsonElement, j -> j + .getAsJsonObject() + .getAsJsonArray(addressesKey) + .get(0) + .getAsJsonObject() + .getAsJsonPrimitive("lat") + .getAsDouble()); + Double lon = Utils.safelyGetOrNull(jsonElement, j -> j + .getAsJsonObject() + .getAsJsonArray(addressesKey) + .get(0) + .getAsJsonObject() + .getAsJsonPrimitive("lng") + .getAsDouble()); + + return new RorData( + country, + city, + wikipediaUrl, + rorTypes == null ? Collections.emptyList() : rorTypes, + lat, + lon + ); } } diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorIdTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorIdTest.java new file mode 100644 index 000000000..62af7b830 --- /dev/null +++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorIdTest.java @@ -0,0 +1,85 @@ +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center +// +// SPDX-License-Identifier: Apache-2.0 + +package nl.esciencecenter.rsd.scraper.ror; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.net.URI; + +class RorIdTest { + + @ParameterizedTest + @ValueSource(strings = { + "https://ror.org/14tsk2644", // ID should start with 0 + "https://ror.org/0ltsk2644", // character 'l' not allowed + "https://ror.org/04tsk26444", // ID longer than 9 characters + "https://ror.org/04tsk264", // // ID shorter than 9 characters + "ror.org/04tsk2644", // should start with https:// + "", + }) + void givenInvalidRorIds_whenTesting_thenFalseReturned(String url) { + Assertions.assertFalse(RorId.isValidRorUrl(url)); + } + + @ParameterizedTest + @ValueSource(strings = { + "https://ror.org/14tsk2644", // ID should start with 0 + "https://ror.org/0ltsk2644", // character 'l' not allowed + "https://ror.org/04tsk26444", // ID longer than 9 characters + "https://ror.org/04tsk264", // // ID shorter than 9 characters + "ror.org/04tsk2644", // should start with https:// + "", + }) + void givenInvalidRorIds_whenCreatingInstance_thenExceptionThrown(String url) { + Assertions.assertThrows(IllegalArgumentException.class, () -> RorId.fromUrlString(url)); + } + + @ParameterizedTest + @ValueSource(strings = { + "https://ror.org/04tsk2644", + "https://ror.org/05qghxh33", + "https://ror.org/01e6qks80", + "https://ror.org/00wjc7c48", + "https://ror.org/01swzsf04", + "https://ror.org/02jbv0t02", + "https://ror.org/05grdyy37", + "https://ror.org/05f950310", + "https://ror.org/04ke6ht85", + "https://ror.org/018dfmf50", + }) + void givenValidRorIds_whenTesting_thenTrueReturned(String url) { + Assertions.assertTrue(RorId.isValidRorUrl(url)); + } + + @ParameterizedTest + @ValueSource(strings = { + "https://ror.org/04tsk2644", + "https://ror.org/05qghxh33", + "https://ror.org/01e6qks80", + "https://ror.org/00wjc7c48", + "https://ror.org/01swzsf04", + "https://ror.org/02jbv0t02", + "https://ror.org/05grdyy37", + "https://ror.org/05f950310", + "https://ror.org/04ke6ht85", + "https://ror.org/018dfmf50", + }) + void givenValidRorIds_whenCreatingInstance_thenNoExceptionThrown(String url) { + RorId rorId = Assertions.assertDoesNotThrow(() -> RorId.fromUrlString(url)); + Assertions.assertNotNull(rorId); + } + + @Test + void givenValidRorId_whenGettingUrls_thenCorrectUrlsReturned() { + RorId rorId = RorId.fromUrlString("https://ror.org/04tsk2644"); + + Assertions.assertEquals(URI.create("https://ror.org/04tsk2644"), rorId.asUrl()); + Assertions.assertEquals(URI.create("https://api.ror.org/organizations/04tsk2644"), rorId.asApiUrl()); + } +} diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnectorTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnectorTest.java index dc6e75924..83e297156 100644 --- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnectorTest.java +++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorPostgrestConnectorTest.java @@ -1,16 +1,19 @@ // SPDX-FileCopyrightText: 2024 Christian Meeßen (GFZ) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 package nl.esciencecenter.rsd.scraper.ror; -import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; import java.util.Collection; import java.util.UUID; -import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; class RorPostgrestConnectorTest { @@ -18,12 +21,12 @@ class RorPostgrestConnectorTest { void testParseBasicJsonData() { String data = "[{\"id\":\"52b13a36-334b-429a-9cbb-5215264b36d2\",\"parent\":null,\"primary_maintainer\":\"22f87e87-7fd9-4716-9dc6-e790372cac4c\",\"slug\":\"gfz\",\"name\":\"Helmholtz Centre Potsdam GFZ German Research Centre for Geosciences\",\"ror_id\":\"https://ror.org/04z8jg394\",\"website\":\"https://www.gfz-potsdam.de\",\"is_tenant\":true,\"created_at\":\"2022-07-26T19:34:55.173342+00:00\",\"updated_at\":\"2023-05-17T06:11:51.338233+00:00\",\"description\":\"# About GFZ Potsdam\n\nThe GFZ is Germany’s national research center for the solid Earth Sciences. Our mission is to deepen the knowledge of the dynamics of the solid Earth, and to develop solutions for grand challenges facing society. These challenges include anticipating the hazards arising from the Earth’s dynamic systems and mitigating the associated risks to society; securing our habitat under the pressure of global change; and supplying energy and mineral resources for a rapidly growing population in a sustainable manner and without harming the environment. \n\n## Research Software at GFZ\n\nTo learn more about the research software policy at GFZ, visit [gfz-potsdam.de/en/software](https://www.gfz-potsdam.de/en/software).\n\n\",\"logo_id\":\"da40b40113fdfb1d3ce40f58ecb784fe33ba40bb\",\"country\":null,\"city\":null,\"short_description\":null}]"; - Collection result = RorPostgrestConnector.parseBasicJsonData(data); + Collection result = RorPostgrestConnector.parseBasicJsonData(data); assertEquals(1, result.size()); - BasicOrganisationData organisation = result.iterator().next(); + OrganisationDatabaseData organisation = result.iterator().next(); assertEquals(UUID.fromString("52b13a36-334b-429a-9cbb-5215264b36d2"), organisation.id()); - assertEquals(null, organisation.city()); - assertEquals(null, organisation.country()); + assertEquals(RorId.fromUrlString("https://ror.org/04z8jg394"), organisation.rorId()); + assertNull(organisation.data()); } } diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java index 4abb428a6..f6b16451c 100644 --- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java +++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java @@ -7,43 +7,27 @@ package nl.esciencecenter.rsd.scraper.ror; -import com.github.tomakehurst.wiremock.client.WireMock; -import com.github.tomakehurst.wiremock.junit5.WireMockTest; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; -import static com.github.tomakehurst.wiremock.client.WireMock.get; -import static com.github.tomakehurst.wiremock.client.WireMock.stubFor; +import java.util.List; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; -@WireMockTest(proxyMode = true) class RorScraperTest { - private static RorScraper rorScraper; - private static String completeJsonResponse = "{\"id\":\"https://ror.org/04z8jg394\",\"name\":\"Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences\",\"email_address\":\"\",\"ip_addresses\":[],\"established\":1992,\"types\":[\"Facility\"],\"relationships\":[{\"label\":\"Helmholtz Association of German Research Centres\",\"type\":\"Parent\",\"id\":\"https://ror.org/0281dp749\"}],\"addresses\":[{\"lat\":52.39886,\"lng\":13.06566,\"state\":null,\"state_code\":null,\"city\":\"Potsdam\",\"geonames_city\":{\"id\":2852458,\"city\":\"Potsdam\",\"geonames_admin1\":{\"name\":\"Brandenburg\",\"id\":2945356,\"ascii_name\":\"Brandenburg\",\"code\":\"DE.11\"},\"geonames_admin2\":{\"name\":null,\"id\":null,\"ascii_name\":null,\"code\":\"DE.11.00\"},\"license\":{\"attribution\":\"Data from geonames.org under a CC-BY 3.0 license\",\"license\":\"http://creativecommons.org/licenses/by/3.0/\"},\"nuts_level1\":{\"name\":null,\"code\":null},\"nuts_level2\":{\"name\":null,\"code\":null},\"nuts_level3\":{\"name\":null,\"code\":null}},\"postcode\":null,\"primary\":false,\"line\":null,\"country_geonames_id\":2921044}],\"links\":[\"https://www.gfz-potsdam.de\"],\"aliases\":[],\"acronyms\":[\"GFZ\"],\"status\":\"active\",\"wikipedia_url\":\"https://en.wikipedia.org/wiki/GFZ_German_Research_Centre_for_Geosciences\",\"labels\":[{\"label\":\"Helmholtz-Zentrum Potsdam - Deutsches GeoForschungsZentrum GFZ\",\"iso639\":\"de\"}],\"country\":{\"country_name\":\"Germany\",\"country_code\":\"DE\"},\"external_ids\":{\"ISNI\":{\"preferred\":null,\"all\":[\"0000 0000 9195 2461\"]},\"FundRef\":{\"preferred\":\"501100010956\",\"all\":[\"501100010956\"]},\"Wikidata\":{\"preferred\":null,\"all\":[\"Q1205654\"]},\"GRID\":{\"preferred\":\"grid.23731.34\",\"all\":\"grid.23731.34\"}}}"; - private static String apiDomain = "api.ror.org"; - private static String apiPath = "/organizations/04t3en479"; @Test - void testLocations() throws Exception { - - stubFor( - get(apiPath) - .withHost(WireMock.equalTo(apiDomain)) - .willReturn( - aResponse() - .withStatus(200) - .withBody(completeJsonResponse) - )); + void testLocations() { + String completeJsonResponse = "{\"id\":\"https://ror.org/04z8jg394\",\"name\":\"Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences\",\"email_address\":\"\",\"ip_addresses\":[],\"established\":1992,\"types\":[\"Facility\"],\"relationships\":[{\"label\":\"Helmholtz Association of German Research Centres\",\"type\":\"Parent\",\"id\":\"https://ror.org/0281dp749\"}],\"addresses\":[{\"lat\":52.39886,\"lng\":13.06566,\"state\":null,\"state_code\":null,\"city\":\"Potsdam\",\"geonames_city\":{\"id\":2852458,\"city\":\"Potsdam\",\"geonames_admin1\":{\"name\":\"Brandenburg\",\"id\":2945356,\"ascii_name\":\"Brandenburg\",\"code\":\"DE.11\"},\"geonames_admin2\":{\"name\":null,\"id\":null,\"ascii_name\":null,\"code\":\"DE.11.00\"},\"license\":{\"attribution\":\"Data from geonames.org under a CC-BY 3.0 license\",\"license\":\"http://creativecommons.org/licenses/by/3.0/\"},\"nuts_level1\":{\"name\":null,\"code\":null},\"nuts_level2\":{\"name\":null,\"code\":null},\"nuts_level3\":{\"name\":null,\"code\":null}},\"postcode\":null,\"primary\":false,\"line\":null,\"country_geonames_id\":2921044}],\"links\":[\"https://www.gfz-potsdam.de\"],\"aliases\":[],\"acronyms\":[\"GFZ\"],\"status\":\"active\",\"wikipedia_url\":\"https://en.wikipedia.org/wiki/GFZ_German_Research_Centre_for_Geosciences\",\"labels\":[{\"label\":\"Helmholtz-Zentrum Potsdam - Deutsches GeoForschungsZentrum GFZ\",\"iso639\":\"de\"}],\"country\":{\"country_name\":\"Germany\",\"country_code\":\"DE\"},\"external_ids\":{\"ISNI\":{\"preferred\":null,\"all\":[\"0000 0000 9195 2461\"]},\"FundRef\":{\"preferred\":\"501100010956\",\"all\":[\"501100010956\"]},\"Wikidata\":{\"preferred\":null,\"all\":[\"Q1205654\"]},\"GRID\":{\"preferred\":\"grid.23731.34\",\"all\":\"grid.23731.34\"}}}"; + RorData rorData = RorScraper.parseData(completeJsonResponse); - rorScraper = new RorScraper("http://" + apiDomain + apiPath); - - assertEquals("Potsdam", rorScraper.city()); - assertEquals("Germany", rorScraper.country()); + assertEquals("Potsdam", rorData.city()); + assertEquals("Germany", rorData.country()); + assertEquals(List.of("Facility"), rorData.rorTypes()); } @@ -53,19 +37,10 @@ void testLocations() throws Exception { "{\"addresses\": [],\"country\": {}}", "{}", }) - void testNullLocationsOrEmptyLocationOrEmptyResponse(String jsonBody) throws Exception { - stubFor( - get(apiPath) - .withHost(WireMock.equalTo(apiDomain)) - .willReturn( - aResponse() - .withStatus(200) - .withBody(jsonBody) - )); - - rorScraper = new RorScraper("http://" + apiDomain + apiPath); + void testNullLocationsOrEmptyLocationOrEmptyResponse(String jsonBody) { + RorData rorData = RorScraper.parseData(jsonBody); - assertNull(rorScraper.city()); - assertNull(rorScraper.country()); + assertNull(rorData.city()); + assertNull(rorData.country()); } } From a6c0c764a89c2469b8c1115eb432c6dfdb3305df Mon Sep 17 00:00:00 2001 From: "Dusan Mijatovic (PC2020)" Date: Fri, 1 Nov 2024 17:07:54 +0100 Subject: [PATCH 2/2] feat: save more ROR data to organisation table and use the local ROR data --- .../admin/organisations/apiOrganisation.tsx | 4 +- .../organisation/apiOrganisations.ts | 48 ++++++++++---- .../context/OrganisationContext.tsx | 15 +++-- .../organisation/metadata/RorType.tsx | 7 +-- .../organisation/metadata/index.tsx | 43 +++++-------- .../units/OrganisationUnitsIndex.test.tsx | 2 +- .../components/organisation/units/index.tsx | 6 +- .../AutosaveFundingOrganisations.test.tsx | 6 +- .../AutosaveFundingOrganisations.tsx | 4 +- .../EditProjectOrganisationsIndex.test.tsx | 4 ++ .../projects/edit/organisations/index.tsx | 4 +- .../software/edit/organisations/index.tsx | 4 +- .../organisations/organisationForSoftware.ts | 4 +- frontend/package-lock.json | 4 +- frontend/pages/organisations/[...slug].tsx | 15 ++--- frontend/types/Organisation.ts | 35 +++++++---- frontend/utils/editProject.ts | 10 +-- frontend/utils/getROR.test.ts | 62 +++++++------------ frontend/utils/getROR.ts | 32 +++------- 19 files changed, 157 insertions(+), 152 deletions(-) diff --git a/frontend/components/admin/organisations/apiOrganisation.tsx b/frontend/components/admin/organisations/apiOrganisation.tsx index 41d8f7881..7536e494a 100644 --- a/frontend/components/admin/organisations/apiOrganisation.tsx +++ b/frontend/components/admin/organisations/apiOrganisation.tsx @@ -13,7 +13,7 @@ import {paginationUrlParams} from '~/utils/postgrestUrl' import {createJsonHeaders, getBaseUrl} from '~/utils/fetchHelpers' import {extractCountFromHeader} from '~/utils/extractCountFromHeader' import logger from '~/utils/logger' -import {columsForCreate, EditOrganisation, OrganisationList} from '~/types/Organisation' +import {colForCreate, EditOrganisation, OrganisationList} from '~/types/Organisation' import {upsertImage} from '~/utils/editImage' import {getSlugFromString} from '~/utils/getSlugFromString' import {getPropsFromObject} from '~/utils/getPropsFromObject' @@ -137,7 +137,7 @@ export function useOrganisations(token: string) { // create slug for new organisation based on name data.slug = getSlugFromString(data.name) // extract props we need for createOrganisation - const organisation = getPropsFromObject(data, columsForCreate) + const organisation = getPropsFromObject(data, colForCreate) // create new organisation const {status,message} = await createOrganisation({ organisation, diff --git a/frontend/components/organisation/apiOrganisations.ts b/frontend/components/organisation/apiOrganisations.ts index 54803786c..d294cf47e 100644 --- a/frontend/components/organisation/apiOrganisations.ts +++ b/frontend/components/organisation/apiOrganisations.ts @@ -9,9 +9,8 @@ import {RsdUser} from '~/auth' import {isOrganisationMaintainer} from '~/auth/permissions/isMaintainerOfOrganisation' import { - Organisation, OrganisationForOverview, - OrganisationList, ProjectOfOrganisation, - SoftwareOfOrganisation + OrganisationForOverview, OrganisationList, + ProjectOfOrganisation, SoftwareOfOrganisation } from '~/types/Organisation' import {extractCountFromHeader} from '~/utils/extractCountFromHeader' import {createJsonHeaders, getBaseUrl} from '~/utils/fetchHelpers' @@ -92,19 +91,19 @@ export async function getOrganisationBySlug({slug,user,token}: }) // console.log('getOrganisationBySlug...isMaintainer...', isMaintainer) // get organisation data - const [organisation, description] = await Promise.all([ + const [organisation, orgInfo] = await Promise.all([ getOrganisationById({ uuid, token, isMaintainer }), - getOrganisationDescription({uuid, token}) + getOrganisationInfo({uuid, token}) ]) - // return consolidate organisation + // return consolidated organisation data return { organisation: { ...organisation, - description + ...orgInfo }, isMaintainer } @@ -193,8 +192,30 @@ export async function getOrganisationChildren({uuid, token}: return [] } -export async function getOrganisationDescription({uuid, token}: { uuid: string, token?: string }) { - const query = `organisation?id=eq.${uuid}&select=description` +// export async function getOrganisationDescription({uuid, token}: { uuid: string, token?: string }) { +// const query = `organisation?id=eq.${uuid}&select=description` +// const url = `${getBaseUrl()}/${query}` +// // console.log('url...', url) +// const resp = await fetch(url, { +// method: 'GET', +// headers: { +// ...createJsonHeaders(token), +// // request single object item +// 'Accept': 'application/vnd.pgrst.object+json' +// } +// }) +// if (resp.status === 200) { +// const json: Organisation = await resp.json() +// return json.description +// } +// // otherwise request failed +// logger(`getOrganisationDescription failed: ${resp.status} ${resp.statusText}`, 'warn') +// // we log and return null +// return null +// } + +export async function getOrganisationInfo({uuid, token}: { uuid: string, token?: string }) { + const query = `organisation?id=eq.${uuid}&select=description,wikipedia_url,city,ror_types` const url = `${getBaseUrl()}/${query}` // console.log('url...', url) const resp = await fetch(url, { @@ -206,8 +227,13 @@ export async function getOrganisationDescription({uuid, token}: { uuid: string, } }) if (resp.status === 200) { - const json: Organisation = await resp.json() - return json.description + const json: any = await resp.json() + return { + city: json.city as string | null, + description: json.description as string | null, + wikipedia_url: json.wikipedia_url as string | null, + ror_types: json?.ror_types ?? [] as string[] | null + } } // otherwise request failed logger(`getOrganisationDescription failed: ${resp.status} ${resp.statusText}`, 'warn') diff --git a/frontend/components/organisation/context/OrganisationContext.tsx b/frontend/components/organisation/context/OrganisationContext.tsx index 26aac8ac0..3d2d754f1 100644 --- a/frontend/components/organisation/context/OrganisationContext.tsx +++ b/frontend/components/organisation/context/OrganisationContext.tsx @@ -11,8 +11,15 @@ type UpdateOrganisationProps = { value: any } +export type OrganisationForContext = OrganisationForOverview & { + description: string | null, + wikipedia_url: string | null, + city: string | null + ror_types: string[] | null +} + type OrganisationContextProps = { - organisation: OrganisationForOverview | null + organisation: OrganisationForContext | null isMaintainer: boolean updateOrganisation:({key,value}:UpdateOrganisationProps)=>void } @@ -24,10 +31,10 @@ const OrganisationContext = createContext({ }) export function OrganisationProvider(props: any) { - // destucture organisation + // destructure organisation const {organisation:initOrganisation, isMaintainer:initMaintainer} = props // set state - use initOrganisation at start - const [organisation, setOrganisation] = useState(initOrganisation) + const [organisation, setOrganisation] = useState(initOrganisation) const [isMaintainer, setIsMaintainer] = useState(initMaintainer ?? false) const updateOrganisation = useCallback(({key, value}:UpdateOrganisationProps) => { @@ -40,7 +47,7 @@ export function OrganisationProvider(props: any) { } },[organisation]) - // we need to update organisation state every time initOrganistation changes + // we need to update organisation state every time initOrganisation changes // because useState is running in different context useEffect(() => { if (initOrganisation.id && !organisation) { diff --git a/frontend/components/organisation/metadata/RorType.tsx b/frontend/components/organisation/metadata/RorType.tsx index 009203e29..925ba5b77 100644 --- a/frontend/components/organisation/metadata/RorType.tsx +++ b/frontend/components/organisation/metadata/RorType.tsx @@ -5,16 +5,15 @@ // // SPDX-License-Identifier: Apache-2.0 -import {RORItem} from '~/utils/getROR' import TypeIcon from '~/components/icons/TypeIcon' -export default function RorType({meta}:{meta:RORItem|null}) { +export default function RorType({ror_types}:Readonly<{ror_types:string[]|null}>) { try { - if (meta === null) return null + if (ror_types === null) return null return ( <> - {meta.types.map(item => ( + {ror_types.map(item => (
{item} diff --git a/frontend/components/organisation/metadata/index.tsx b/frontend/components/organisation/metadata/index.tsx index ea8db99b3..31af3b1e2 100644 --- a/frontend/components/organisation/metadata/index.tsx +++ b/frontend/components/organisation/metadata/index.tsx @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all -// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -9,7 +9,6 @@ import LanguageIcon from '@mui/icons-material/Language' import AutoStoriesIcon from '@mui/icons-material/AutoStories' import MapIcon from '@mui/icons-material/Map' -import {RORItem} from '~/utils/getROR' import RorIcon from '~/components/icons/RorIcon' import OrganisationLogo from './OrganisationLogo' import RorType from './RorType' @@ -19,12 +18,11 @@ import {getHostnameFromUrl} from '~/utils/getHostname' import BaseSurfaceRounded from '~/components/layout/BaseSurfaceRounded' import useOrganisationContext from '../context/useOrganisationContext' -type OrganisationMetadataProps = { - ror_info: RORItem | null -} - -export default function OrganisationMetadata({ror_info}: OrganisationMetadataProps) { - const {name,short_description,country,website,isMaintainer} = useOrganisationContext() +export default function OrganisationMetadata() { + const { + name,short_description,country,city,website, + isMaintainer,wikipedia_url,ror_id,ror_types + } = useOrganisationContext() // console.group('OrganisationMetadata') // console.log('short_description...', short_description) @@ -45,34 +43,27 @@ export default function OrganisationMetadata({ror_info}: OrganisationMetadataPro icon: , }) } - } else if (ror_info && ror_info.links && ror_info.links.length > 0) { - const title = getHostnameFromUrl(ror_info.links[0]) ?? 'Website' - rsdLinks.push({ - title, - url: ror_info.links[0], - icon: , - }) } - // ror_info.id is ror_id url - if (ror_info && ror_info.id) { + // ror_id url + if (ror_id) { // add only new items rsdLinks.push({ title:'ROR info', - url: ror_info.id, + url: ror_id, icon: , }) } // some organisations provide wikipedia page - if (ror_info && ror_info?.wikipedia_url) { + if (wikipedia_url) { rsdLinks.push({ title:'Wikipedia', - url: ror_info?.wikipedia_url, + url: wikipedia_url, icon: , }) } // Google Maps link - if (ror_info?.addresses[0].city && ror_info?.country.country_name) { - const query = encodeURIComponent(`${name},${ror_info?.addresses[0].city},${ror_info?.country.country_name}`) + if (name && city && country) { + const query = encodeURIComponent(`${name},${city},${country}`) const href = `https://www.google.com/maps/search/?api=1&query=${query}` rsdLinks.push({ title:'Map', @@ -97,15 +88,15 @@ export default function OrganisationMetadata({ror_info}: OrganisationMetadataPro {name}

{short_description}

- +
diff --git a/frontend/components/organisation/units/OrganisationUnitsIndex.test.tsx b/frontend/components/organisation/units/OrganisationUnitsIndex.test.tsx index fc87e6b8b..f2523c994 100644 --- a/frontend/components/organisation/units/OrganisationUnitsIndex.test.tsx +++ b/frontend/components/organisation/units/OrganisationUnitsIndex.test.tsx @@ -264,7 +264,7 @@ describe('frontend/components/organisation/software/index.tsx', () => { // validate api call const expectedUrl = '/api/v1/organisation' const expectedPayload = { - 'body': '{"parent":"91c2ffa7-bce6-4488-be00-6613a2d99f51","slug":"test-unit-name","primary_maintainer":"121212121212","name":"Test unit name","ror_id":null,"is_tenant":false,"website":"https://google.com/test","logo_id":null}', + 'body': '{"parent":"91c2ffa7-bce6-4488-be00-6613a2d99f51","primary_maintainer":"121212121212","slug":"test-unit-name","name":"Test unit name","ror_id":null,"website":"https://google.com/test","is_tenant":false,"country":null,"city":null,"wikipedia_url":null,"ror_types":null,"logo_id":null}', 'headers': { 'Authorization': 'Bearer TEST_TOKEN', 'Content-Type': 'application/json', diff --git a/frontend/components/organisation/units/index.tsx b/frontend/components/organisation/units/index.tsx index 04aa0d7ad..07a9d28d4 100644 --- a/frontend/components/organisation/units/index.tsx +++ b/frontend/components/organisation/units/index.tsx @@ -15,7 +15,7 @@ import Button from '@mui/material/Button' import {useSession} from '~/auth' import useSnackbar from '../../snackbar/useSnackbar' import { - columsForCreate, columsForUpdate, CoreOrganisationProps, + colForCreate, colForUpdate, CoreOrganisationProps, EditOrganisation, Organisation, OrganisationForOverview } from '../../../types/Organisation' import { @@ -150,7 +150,7 @@ export default function ResearchUnits() { } // SAVE organisation if (typeof pos != 'undefined' && data.id) { - const unit:Organisation = getPropsFromObject(data,columsForUpdate) + const unit:Organisation = getPropsFromObject(data,colForUpdate) // update existing organisation const resp = await updateOrganisation({ organisation: unit, @@ -165,7 +165,7 @@ export default function ResearchUnits() { } } else { // create new organisation - const unit:CoreOrganisationProps = getPropsFromObject(data, columsForCreate) + const unit:CoreOrganisationProps = getPropsFromObject(data, colForCreate) const resp = await createOrganisation({ organisation:unit, token diff --git a/frontend/components/projects/edit/information/AutosaveFundingOrganisations.test.tsx b/frontend/components/projects/edit/information/AutosaveFundingOrganisations.test.tsx index 8631f3e08..357cc1972 100644 --- a/frontend/components/projects/edit/information/AutosaveFundingOrganisations.test.tsx +++ b/frontend/components/projects/edit/information/AutosaveFundingOrganisations.test.tsx @@ -182,14 +182,18 @@ it('can add funding organisation from ROR', async() => { expect(mockCreateOrganisation).toBeCalledTimes(1) expect(mockCreateOrganisation).toBeCalledWith({ 'organisation': { + 'city': null, + 'country': null, 'is_tenant': false, 'logo_id': foundOrgs[1].data.logo_id, 'name': foundOrgs[1].data.name, 'parent': null, 'primary_maintainer': null, 'ror_id': foundOrgs[1].data.ror_id, + 'ror_types': null, 'slug': 'vu-university-amsterdam', - 'website': foundOrgs[1].data.website + 'website': foundOrgs[1].data.website, + 'wikipedia_url': null, }, 'token': 'TEST_TOKEN', }) diff --git a/frontend/components/projects/edit/information/AutosaveFundingOrganisations.tsx b/frontend/components/projects/edit/information/AutosaveFundingOrganisations.tsx index 4270c6c98..a52a1acff 100644 --- a/frontend/components/projects/edit/information/AutosaveFundingOrganisations.tsx +++ b/frontend/components/projects/edit/information/AutosaveFundingOrganisations.tsx @@ -11,7 +11,7 @@ import {useState} from 'react' import {useSession} from '~/auth' import useSnackbar from '~/components/snackbar/useSnackbar' -import {columsForCreate, SearchOrganisation} from '~/types/Organisation' +import {colForCreate, SearchOrganisation} from '~/types/Organisation' import {createOrganisation, searchForOrganisation} from '~/utils/editOrganisation' import {addOrganisationToProject, deleteOrganisationFromProject} from '~/utils/editProject' import {getPropsFromObject} from '~/utils/getPropsFromObject' @@ -40,7 +40,7 @@ export default function AutosaveFundingOrganisations({id,items}:FundingOrganisat } // console.log('onAddOrganisation...', selected) if (selected.id===null){ - const organisation = getPropsFromObject(selected,columsForCreate) + const organisation = getPropsFromObject(selected,colForCreate) // createNewOrganisation(selected) resp = await createOrganisation({ organisation, diff --git a/frontend/components/projects/edit/organisations/EditProjectOrganisationsIndex.test.tsx b/frontend/components/projects/edit/organisations/EditProjectOrganisationsIndex.test.tsx index 77dd3c265..7bea80fb9 100644 --- a/frontend/components/projects/edit/organisations/EditProjectOrganisationsIndex.test.tsx +++ b/frontend/components/projects/edit/organisations/EditProjectOrganisationsIndex.test.tsx @@ -198,14 +198,18 @@ describe('frontend/components/projects/edit/organisations/index.tsx', () => { expect(mockCreateOrganisation).toBeCalledTimes(1) expect(mockCreateOrganisation).toBeCalledWith({ 'organisation': { + 'city': null, + 'country': null, 'is_tenant': false, 'logo_id': null, 'name': searchFor, 'parent': null, 'primary_maintainer': null, 'ror_id': null, + 'ror_types': null, 'slug': expectSlug, 'website': expectWebsite, + 'wikipedia_url': null, }, 'token': mockSession.token, }) diff --git a/frontend/components/projects/edit/organisations/index.tsx b/frontend/components/projects/edit/organisations/index.tsx index f71d272cd..91ab12746 100644 --- a/frontend/components/projects/edit/organisations/index.tsx +++ b/frontend/components/projects/edit/organisations/index.tsx @@ -9,7 +9,7 @@ import {useState} from 'react' import {useSession} from '~/auth' -import {columsForUpdate, EditOrganisation, SearchOrganisation} from '~/types/Organisation' +import {colForUpdate, EditOrganisation, SearchOrganisation} from '~/types/Organisation' import { newOrganisationProps, searchToEditOrganisation, updateOrganisation @@ -230,7 +230,7 @@ export default function ProjectOrganisations() { // SAVE organisation if (typeof pos != 'undefined' && data.id) { // extract data for update - const organisation = getPropsFromObject(data,columsForUpdate) + const organisation = getPropsFromObject(data,colForUpdate) // update existing organisation const resp = await updateOrganisation({ organisation, diff --git a/frontend/components/software/edit/organisations/index.tsx b/frontend/components/software/edit/organisations/index.tsx index b4b2e26e6..7ce5db665 100644 --- a/frontend/components/software/edit/organisations/index.tsx +++ b/frontend/components/software/edit/organisations/index.tsx @@ -10,7 +10,7 @@ import {useState} from 'react' import {useSession} from '~/auth' import { - columsForUpdate, + colForUpdate, EditOrganisation, SearchOrganisation, SoftwareForOrganisation @@ -250,7 +250,7 @@ export default function SoftwareOrganisations() { } if (typeof pos !== 'undefined' && data.id) { // extract data for update - const organisation = getPropsFromObject(data,columsForUpdate) + const organisation = getPropsFromObject(data,colForUpdate) // update existing organisation const resp = await updateOrganisation({ organisation, diff --git a/frontend/components/software/edit/organisations/organisationForSoftware.ts b/frontend/components/software/edit/organisations/organisationForSoftware.ts index 3e3996c05..8fb441914 100644 --- a/frontend/components/software/edit/organisations/organisationForSoftware.ts +++ b/frontend/components/software/edit/organisations/organisationForSoftware.ts @@ -6,7 +6,7 @@ // // SPDX-License-Identifier: Apache-2.0 -import {columsForCreate, EditOrganisation, SoftwareForOrganisation} from '~/types/Organisation' +import {colForCreate, EditOrganisation, SoftwareForOrganisation} from '~/types/Organisation' import {createOrganisation} from '~/utils/editOrganisation' import {createJsonHeaders, extractReturnMessage} from '~/utils/fetchHelpers' import {getPropsFromObject} from '~/utils/getPropsFromObject' @@ -15,7 +15,7 @@ import logger from '~/utils/logger' export async function createOrganisationAndAddToSoftware({item, token, software}: { item: EditOrganisation, token: string, software: string}) { // extract props we need for createOrganisation - const organisation = getPropsFromObject(item, columsForCreate) + const organisation = getPropsFromObject(item, colForCreate) // create new organisation let resp = await createOrganisation({ organisation, diff --git a/frontend/package-lock.json b/frontend/package-lock.json index e0f275fa1..694b4328d 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1,12 +1,12 @@ { "name": "rsd-frontend", - "version": "2.22.1", + "version": "2.23.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "rsd-frontend", - "version": "2.22.1", + "version": "2.23.0", "dependencies": { "@dnd-kit/core": "6.0.8", "@dnd-kit/modifiers": "6.0.1", diff --git a/frontend/pages/organisations/[...slug].tsx b/frontend/pages/organisations/[...slug].tsx index afa35a0ec..041c1d5b1 100644 --- a/frontend/pages/organisations/[...slug].tsx +++ b/frontend/pages/organisations/[...slug].tsx @@ -10,9 +10,7 @@ import {GetServerSidePropsContext} from 'next/types' import {app} from '~/config/app' import {getUserFromToken} from '~/auth' -import {getOrganisationMetadata, RORItem} from '~/utils/getROR' import {getUserSettings} from '~/utils/userSettings' -import {OrganisationForOverview} from '~/types/Organisation' import {getOrganisationBySlug} from '~/components/organisation/apiOrganisations' import OrganisationMetadata from '~/components/organisation/metadata' import PageMeta from '~/components/seo/PageMeta' @@ -23,13 +21,12 @@ import BaseSurfaceRounded from '~/components/layout/BaseSurfaceRounded' import OrganisationTabs from '~/components/organisation/tabs/OrganisationTabs' import TabContent from '~/components/organisation/tabs/TabContent' import {TabKey} from '~/components/organisation/tabs/OrganisationTabItems' -import {OrganisationProvider} from '~/components/organisation/context/OrganisationContext' +import {OrganisationForContext, OrganisationProvider} from '~/components/organisation/context/OrganisationContext' import {LayoutType} from '~/components/software/overview/search/ViewToggleGroup' import {UserSettingsProvider} from '~/components/organisation/context/UserSettingsContext' export type OrganisationPageProps = { - organisation: OrganisationForOverview, - ror: RORItem | null + organisation: OrganisationForContext, slug: string[], tab: TabKey | null, isMaintainer: boolean, @@ -38,7 +35,7 @@ export type OrganisationPageProps = { } export default function OrganisationPage({ - organisation, slug, tab, ror, + organisation, slug, tab, isMaintainer, rsd_page_rows, rsd_page_layout }: OrganisationPageProps) { @@ -46,7 +43,6 @@ export default function OrganisationPage({ // console.log('organisation...', organisation) // console.log('slug....', slug) // console.log('ror....', ror) - // console.log('tab....', tab) // console.log('select_tab....', select_tab) // console.log('loading....', loading) // console.log('isMaintainer....', isMaintainer) @@ -88,7 +84,7 @@ export default function OrganisationPage({ path:'/organisations' }} /> - + {/* TABS */} -// SPDX-FileCopyrightText: 2023 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -13,11 +13,24 @@ export type Status = 'rejected_by_origin' | 'rejected_by_relation' | 'approved' export type OrganisationRole = 'participating' | 'funding' | 'hosting' export type OrganisationSource = 'RSD' | 'ROR' | 'MANUAL' -// organisation colums used in editOrganisation.createOrganisation +// organisation columns used in editOrganisation.createOrganisation // NOTE! update when type CoreOrganisationProps changes -export const columsForCreate = [ - 'parent', 'slug', 'primary_maintainer', 'name', 'ror_id', 'is_tenant', 'website', 'logo_id', +export const colForCreate = [ + 'parent', 'primary_maintainer', 'slug', + 'name', 'ror_id', 'website', 'is_tenant', + 'country','city','wikipedia_url','ror_types', + 'logo_id' ] + +// organisation columns used in editOrganisation.updateOrganisation +// NOTE! update when type Organisation changes +export const colForUpdate = [ + 'id', + 'short_description', + 'description', + ...colForCreate +] + export type CoreOrganisationProps = { parent: string | null slug: string | null @@ -29,21 +42,17 @@ export type CoreOrganisationProps = { logo_id: string | null } -// organisation colums used in editOrganisation.updateOrganisation -// NOTE! update when type Organisation changes -export const columsForUpdate = [ - 'id', - 'description', - ...columsForCreate -] export type Organisation = CoreOrganisationProps & { id: string | null // about page content created by maintainer description: string | null short_description: string | null country: string | null + city: string | null parent_names?: string rsd_path?: string + wikipedia_url?: string | null + rsd_types?: string[] | null } // adding source @@ -127,7 +136,7 @@ export type ProjectOrganisationProps = ParticipatingOrganisationProps & { role: OrganisationRole } -export type OrganisationForOverview = Organisation & { +export type OrganisationForOverview = Omit & { id: string slug: string logo_id: string | null diff --git a/frontend/utils/editProject.ts b/frontend/utils/editProject.ts index b2b36d80a..70a708d60 100644 --- a/frontend/utils/editProject.ts +++ b/frontend/utils/editProject.ts @@ -1,10 +1,10 @@ -// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2022 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences // SPDX-FileCopyrightText: 2022 Matthias Rüster (GFZ) // SPDX-FileCopyrightText: 2022 dv4all -// SPDX-FileCopyrightText: 2023 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center) // // SPDX-License-Identifier: Apache-2.0 @@ -13,14 +13,14 @@ import {createJsonHeaders, extractReturnMessage} from './fetchHelpers' import { NewProject, ProjectLink, ResearchDomainForProject } from '~/types/Project' -import {columsForCreate, EditOrganisation, OrganisationRole, Status} from '~/types/Organisation' +import {colForCreate, EditOrganisation, OrganisationRole, Status} from '~/types/Organisation' import {createOrganisation} from './editOrganisation' import {getPropsFromObject} from './getPropsFromObject' // query for software item page based on slug export async function validProjectItem(slug: string | undefined, token?: string) { try { - // this request is always perfomed from frontend + // this request is always performed from frontend const url = `/api/v1/project?select=id,slug&slug=eq.${slug}` let resp if (token) { @@ -84,7 +84,7 @@ export async function addProject({project, token}: export async function createOrganisationAndAddToProject({project, item, token, role='participating'}: { item: EditOrganisation, project: string, token: string, role?: OrganisationRole}) { // extract props we need for createOrganisation - const organisation = getPropsFromObject(item, columsForCreate) + const organisation = getPropsFromObject(item, colForCreate) // create new organisation let resp = await createOrganisation({ organisation, diff --git a/frontend/utils/getROR.test.ts b/frontend/utils/getROR.test.ts index 6fffb138c..d42c67a8a 100644 --- a/frontend/utils/getROR.test.ts +++ b/frontend/utils/getROR.test.ts @@ -1,9 +1,11 @@ +// SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (dv4all) (dv4all) +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2024 dv4all // // SPDX-License-Identifier: Apache-2.0 -import {getOrganisationMetadata, findInROR} from './getROR' +import {findInROR} from './getROR' // mock fetch const mockFetchJson = jest.fn((props) => Promise.resolve(props)) @@ -20,40 +22,19 @@ beforeEach(() => { jest.clearAllMocks() }) -it('getOrganisationMetadata does NOT call fetch on undefined ror_id', async () => { - const ror_id = undefined - const resp = await getOrganisationMetadata(ror_id as any) - expect(resp).toBe(null) - expect(mockFetch).not.toBeCalled() -}) - -it('getOrganisationMetadata does NOT call fetch on ror_id=null', async () => { - const ror_id = null - const resp = await getOrganisationMetadata(ror_id) - expect(resp).toBe(null) - expect(mockFetch).not.toBeCalled() -}) - -it('getOrganisationMetadata does NOT call fetch on empty string ror_id=" "', async () => { - const ror_id = ' ' - const resp = await getOrganisationMetadata(ror_id) - expect(resp).toBe(null) - expect(mockFetch).not.toBeCalled() -}) - -it('getOrganisationMetadata calls fetch on ror_id="ABCD"', async () => { - const ror_id = 'ABCD' - await getOrganisationMetadata(ror_id) - - // validate fetch call - expect(mockFetch).toBeCalledTimes(1) - expect(mockFetch).toBeCalledWith(`https://api.ror.org/organizations/${ror_id}`) -}) - it('findInROR calls fetch with search param and json header', async () => { const searchFor = 'ABCD' // mock ROR response - mockFetchJson.mockResolvedValueOnce({items:[{id:'test-id',name:'Test organisation',links:[]}]}) + mockFetchJson.mockResolvedValueOnce({items:[{ + id:'test-id', + name:'Test organisation', + country:{country_name:'Test country'}, + addresses:[ + {city: 'Test city'} + ], + links:[], + types:['Education'] + }]}) const resp = await findInROR({searchFor}) @@ -61,17 +42,22 @@ it('findInROR calls fetch with search param and json header', async () => { expect(resp).toEqual([ { 'data': { - 'description': null, 'id': null, - 'is_tenant': false, - 'logo_id': null, - 'name': 'Test organisation', 'parent': null, 'primary_maintainer': null, - 'ror_id': 'test-id', 'slug': 'test-organisation', + 'name': 'Test organisation', + 'short_description': null, + 'description': null, + 'ror_id': 'test-id', + 'website': null, + 'is_tenant': false, + 'country': 'Test country', + 'city': 'Test city', + 'wikipedia_url': null, + 'ror_types': ['Education'], + 'logo_id': null, 'source': 'ROR', - 'website': '', }, 'key': 'test-id', 'label': 'Test organisation', diff --git a/frontend/utils/getROR.ts b/frontend/utils/getROR.ts index 1389f2538..9efc17403 100644 --- a/frontend/utils/getROR.ts +++ b/frontend/utils/getROR.ts @@ -51,41 +51,27 @@ function buildAutocompleteOptions(rorItems: RORItem[]): AutocompleteOption