Skip to content

Commit

Permalink
Merge pull request #39 from AAFC-BICoE/35195_support_functions_on_dat…
Browse files Browse the repository at this point in the history
…a_export

35195 support functions on data export
  • Loading branch information
brandonandre authored Nov 20, 2024
2 parents 4e7c251 + 7688833 commit 09f4225
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 8 deletions.
10 changes: 10 additions & 0 deletions docs/api.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,16 @@ POST /data-export
}
----

Optionally, some functions can be used on columns to create new calculated columns that can then be added to the `columns`.

[source, json]
----
{
"columnFunctions":{"latLong":{"functionName":"CONVERT_COORDINATES_DD","params":["collectingEvent.eventGeom"]}}
}
----


== Download File

For data exports, the `fileId` is the UUID returned by the export request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import io.crnk.core.resource.annotations.JsonApiResource;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
Expand Down Expand Up @@ -53,4 +54,6 @@ public class DataExportDto {
@IgnoreDinaMapping(reason = "handled by DataExportColumnsFieldAdapter")
private List<String> columnAliases;

private Map<String, DataExport.FunctionDef> columnFunctions;

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ca.gc.aafc.dina.export.api.entity;

import java.time.OffsetDateTime;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import javax.persistence.Column;
Expand Down Expand Up @@ -40,6 +41,7 @@ public class DataExport implements DinaEntity {

public enum ExportStatus { NEW, RUNNING, COMPLETED, EXPIRED, ERROR }
public enum ExportType { TABULAR_DATA, OBJECT_ARCHIVE }
public enum FunctionName { CONCAT, CONVERT_COORDINATES_DD }

@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
Expand Down Expand Up @@ -82,6 +84,10 @@ public enum ExportType { TABULAR_DATA, OBJECT_ARCHIVE }
@Column
private String[] columnAliases;

@Type(type = "jsonb")
@Column
private Map<String, FunctionDef> columnFunctions;

@Enumerated(EnumType.STRING)
@NotNull
@Column
Expand All @@ -90,4 +96,6 @@ public enum ExportType { TABULAR_DATA, OBJECT_ARCHIVE }
@Transient
private Map<String, String> transitiveData;

public record FunctionDef(FunctionName functionName, List<String> params) {
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package ca.gc.aafc.dina.export.api.generator;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections4.IteratorUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;

Expand Down Expand Up @@ -54,6 +57,8 @@ public class TabularDataExportGenerator extends DataExportGenerator {

private static final TypeRef<List<Map<String, Object>>> JSON_PATH_TYPE_REF = new TypeRef<>() {
};
private static final String COORDINATES_DD_FORMAT = "%f,%f";
private static final String DEFAULT_CONCAT_SEP = ",";

private final ObjectMapper objectMapper;
private final ElasticSearchDataSource elasticSearchDataSource;
Expand Down Expand Up @@ -111,7 +116,7 @@ public CompletableFuture<UUID> export(DataExport dinaExport) throws IOException
new TypeReference<>() {
}, w)) {
export(dinaExport.getSource(), objectMapper.writeValueAsString(dinaExport.getQuery()),
output);
dinaExport.getColumnFunctions(), output);
}
} catch (IOException ioEx) {
updateStatus(dinaExport.getUuid(), DataExport.ExportStatus.ERROR);
Expand Down Expand Up @@ -148,14 +153,16 @@ public void deleteExport(DataExport dinaExport) throws IOException {
* @param output
* @throws IOException
*/
private void export(String sourceIndex, String query, DataOutput<JsonNode> output) throws IOException {
private void export(String sourceIndex, String query,
Map<String, DataExport.FunctionDef> columnFunctions,
DataOutput<JsonNode> output) throws IOException {
SearchResponse<JsonNode>
response = elasticSearchDataSource.searchWithPIT(sourceIndex, query);

boolean pageAvailable = response.hits().hits().size() != 0;
while (pageAvailable) {
for (Hit<JsonNode> hit : response.hits().hits()) {
processRecord(hit.id(), hit.source(), output);
processRecord(hit.id(), hit.source(), columnFunctions, output);
}
pageAvailable = false;

Expand All @@ -178,7 +185,9 @@ private void export(String sourceIndex, String query, DataOutput<JsonNode> outpu
* @param output
* @throws IOException
*/
private void processRecord(String documentId, JsonNode record, DataOutput<JsonNode> output) throws IOException {
private void processRecord(String documentId, JsonNode record,
Map<String, DataExport.FunctionDef> columnFunctions,
DataOutput<JsonNode> output) throws IOException {
if (record == null) {
return;
}
Expand All @@ -204,6 +213,20 @@ private void processRecord(String documentId, JsonNode record, DataOutput<JsonNo
objectMapper.valueToTree(entry.getValue()));
replaceNestedByDotNotation(attributeObjNode);
}

// Check if we have functions to apply
if (MapUtils.isNotEmpty(columnFunctions)) {
for (var functionDef : columnFunctions.entrySet()) {
switch (functionDef.getValue().functionName()) {
case CONCAT -> attributeObjNode.put(functionDef.getKey(),
handleConcatFunction(attributeObjNode, functionDef.getValue().params()));
case CONVERT_COORDINATES_DD -> attributeObjNode.put(functionDef.getKey(),
handleConvertCoordinatesDecimalDegrees(attributeObjNode,
functionDef.getValue().params()));
default -> log.warn("Unknown function. Ignoring");
}
}
}
output.addRecord(attributeObjNode);
}
}
Expand All @@ -212,7 +235,7 @@ private Map<String, Object> extractById(String id, List<Map<String, Object>> doc
DocumentContext dc = JsonPath.using(jsonPathConfiguration).parse(document);
try {
List<Map<String, Object>> includedObj = JsonPathHelper.extractById(dc, id, JSON_PATH_TYPE_REF);
return CollectionUtils.isEmpty(includedObj) ? Map.of() : includedObj.get(0);
return CollectionUtils.isEmpty(includedObj) ? Map.of() : includedObj.getFirst();
} catch (PathNotFoundException pnf) {
return Map.of();
}
Expand Down Expand Up @@ -288,6 +311,58 @@ private static boolean jsonNodeHasFieldAndIsArray(JsonNode node, String fieldNam
return node.has(fieldName) && node.get(fieldName).isArray();
}

/**
* Replace with dina-base function when 0.132 is available.
*
* @param objNode
* @param fieldName
* @return
*/
private static String safeAsText(ObjectNode objNode, String fieldName) {
return objNode.has(fieldName) ? objNode.get(fieldName).asText() : "";
}

/**
* Gets all the text for the "attributes" specified by the columns and concatenate them using
* the default separator.
* @param attributeObjNod
* @param columns
* @return
*/
private static String handleConcatFunction(ObjectNode attributeObjNod, List<String> columns) {
List<String> toConcat = new ArrayList<>();
for (String col : columns) {
toConcat.add(safeAsText(attributeObjNod, col));
}
return String.join(DEFAULT_CONCAT_SEP, toConcat);
}

/**
* Gets the coordinates from a geo_point column stored as [longitude,latitude] and return them as
* decimal lat,long
* @param attributeObjNod
* @param columns
* @return
*/
private static String handleConvertCoordinatesDecimalDegrees(ObjectNode attributeObjNod,
List<String> columns) {
String decimalDegreeCoordinates = null;
if (columns.size() == 1) {
JsonNode coordinates = attributeObjNod.get(columns.getFirst());
if (coordinates != null && coordinates.isArray()) {
List<JsonNode> longLatNode = IteratorUtils.toList(coordinates.iterator());
if (longLatNode.size() == 2) {
decimalDegreeCoordinates = String.format(COORDINATES_DD_FORMAT,
longLatNode.get(1).asDouble(), longLatNode.get(0).asDouble());
}
}
}
if (StringUtils.isBlank(decimalDegreeCoordinates)) {
log.debug("Invalid Coordinates format. Array of doubles in form of [lon,lat] expected");
}
return decimalDegreeCoordinates;
}

/**
* Creates a special document that represents all the values concatenated (by ; like the array elements) per attributes
* @param toMerge
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/db/changelog/db.changelog-master.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
<include file="db/changelog/migrations/5-Add_name_to_export_table.xml"/>
<include file="db/changelog/migrations/6-Add_type_variables_to_report_template_table.xml"/>
<include file="db/changelog/migrations/7-Add_column_aliases_to_data_export_table.xml"/>
<include file="db/changelog/migrations/8-Add_column_functions_to_data_export_table.xml"/>
</databaseChangeLog>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.1" encoding="UTF-8" standalone="no" ?>
<databaseChangeLog xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog https://local.xsd/dbchangelog-4.4.xsd"
objectQuotingStrategy="QUOTE_ONLY_RESERVED_WORDS">

<changeSet id="8-Add_column_functions_to_data_export_table" context="schema-change" author="cgendreau">
<addColumn tableName="data_export">
<column name="column_functions" type="jsonb"/>
</addColumn>
</changeSet>
</databaseChangeLog>
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import ca.gc.aafc.dina.export.api.file.FileController;
import ca.gc.aafc.dina.export.api.testsupport.jsonapi.JsonApiDocuments;
import ca.gc.aafc.dina.testsupport.elasticsearch.ElasticSearchTestUtils;
import ca.gc.aafc.dina.testsupport.jsonapi.JsonAPITestHelper;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
Expand All @@ -26,11 +27,18 @@
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import javax.inject.Inject;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;

@ContextConfiguration(initializers = { ElasticSearchTestContainerInitializer.class })
public class DataExportRepositoryIT extends BaseIntegrationTest {

Expand Down Expand Up @@ -73,12 +81,13 @@ public void testESDatasource() throws IOException {
.query(query)
.columns(List.of("id", "materialSampleName", "collectingEvent.dwcVerbatimLocality",
"dwcCatalogNumber", "dwcOtherCatalogNumbers", "managedAttributes.attribute_1",
"collectingEvent.managedAttributes.attribute_ce_1", "projects.name"))
"collectingEvent.managedAttributes.attribute_ce_1", "projects.name", "latLong"))
.columnFunctions(Map.of("latLong", new DataExport.FunctionDef(DataExport.FunctionName.CONVERT_COORDINATES_DD, List.of("collectingEvent.eventGeom"))))
.build());
assertNotNull(dto.getUuid());

try {
asyncConsumer.getAccepted().get(0).get();
asyncConsumer.getAccepted().getFirst().get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
Expand All @@ -88,6 +97,15 @@ public void testESDatasource() throws IOException {
assertEquals(DataExport.ExportType.TABULAR_DATA, savedDataExportDto.getExportType());
assertEquals("my export", savedDataExportDto.getName());


ObjectMapper IT_OBJECT_MAPPER = new ObjectMapper();

IT_OBJECT_MAPPER.registerModule(new JavaTimeModule());
IT_OBJECT_MAPPER.configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false);
IT_OBJECT_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL);

System.out.println(IT_OBJECT_MAPPER.writeValueAsString(savedDataExportDto));

ResponseEntity<InputStreamResource>
response = fileController.downloadFile(dto.getUuid(), FileController.DownloadType.DATA_EXPORT);

Expand Down Expand Up @@ -115,6 +133,9 @@ public void testESDatasource() throws IOException {
// check that to-many relationships are exported in a similar way of arrays
assertTrue(lines.get(1).contains("project 1;project 2"));

// check that the function is working as expected
assertTrue(lines.get(1).contains("45.424721,-75.695000"));

// delete the export
dataExportRepository.delete(dto.getUuid());
assertThrows(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ public class JsonApiDocuments {
"attributes": {
"group": "cnc",
"dwcVerbatimLocality" : "Montreal",
"managedAttributes" : { "attribute_ce_1":"value ce 1"}
"managedAttributes" : { "attribute_ce_1":"value ce 1"},
"eventGeom": [-75.695000, 45.424721]
}
},
{
Expand Down

0 comments on commit 09f4225

Please sign in to comment.