Skip to content

Commit

Permalink
Merge pull request #96 from Ferlab-Ste-Justine/fix/cqdg-873_study_fields
Browse files Browse the repository at this point in the history
feat: CQDG-873 new study fields
  • Loading branch information
adipaul1981 authored Oct 16, 2024
2 parents 820ad1e + 43c8cb6 commit 41ca3ad
Show file tree
Hide file tree
Showing 26 changed files with 280 additions and 58 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@ jobs:
with:
java-version: '11'
distribution: 'adopt'
# cache: sbt
- uses: sbt/setup-sbt@v1
- name: Run tests
run: sbt clean test
9 changes: 1 addition & 8 deletions fhavro-export/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,18 @@ version := "0.0.1"
scalaVersion := "2.13.7"

val awsVersion = "2.16.66"
val fhirVersion = "5.0.2"
val slf4jVersion = "1.7.30"
val avroVersion = "1.10.2"
val fhavroVersion = "0.0.10-SNAPSHOT"
val fhavroVersion = "0.0.11"

libraryDependencies ++= Seq(
"software.amazon.awssdk" % "s3" % awsVersion,
"software.amazon.awssdk" % "apache-client" % awsVersion,
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-simple" % slf4jVersion,
"ca.uhn.hapi.fhir" % "hapi-fhir-client" % fhirVersion,
"ca.uhn.hapi.fhir" % "hapi-fhir-structures-r4" % fhirVersion,
"ca.uhn.hapi.fhir" % "org.hl7.fhir.r4" % "5.0.0",
"org.typelevel" %% "cats-core" % "2.3.1",
"com.typesafe.play" %% "play-json" % "2.9.2",
"com.github.pureconfig" %% "pureconfig" % "0.15.0",
"com.softwaremill.sttp.client3" %% "core" % "3.1.0",
"org.apache.avro" % "avro" % avroVersion,
"org.apache.avro" % "avro-ipc-netty" % avroVersion,
"bio.ferlab" % "fhavro" % fhavroVersion,
"org.keycloak" % "keycloak-authz-client" % "12.0.3",

Expand Down
49 changes: 49 additions & 0 deletions fhavro-export/src/main/resources/schema/cqdg-researchstudy.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,14 @@
"name": "valueCoding",
"default": {}
},
{
"name":"valueBoolean",
"type":[
"null",
"boolean"
],
"default":null
},
{
"name": "extension",
"type": {
Expand All @@ -226,6 +234,14 @@
"string"
],
"default":null
},
{
"name": "valueInteger",
"type": [
"null",
"int"
],
"default": null
}
],
"default": {}
Expand Down Expand Up @@ -705,6 +721,39 @@
"default":[

]
},
{
"name": "extension",
"type": {
"type": "array",
"items": {
"type": "record",
"name": "ExtensionContact",
"doc": "An Extension",
"namespace": "bio.ferlab.fhir",
"fields": [
{
"name": "url",
"type": [
"null",
"string"
],
"default": null
},
{
"name":"valueString",
"type":[
"null",
"string"
],
"default":null
}
],
"default": {}
},
"default": []
},
"default": []
}
],
"default":{
Expand Down
8 changes: 8 additions & 0 deletions import-task/src/main/scala/bio/ferlab/fhir/etl/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,12 @@ package object etl {
val ACCESS_REQUIREMENTS_S_D = s"$SYSTEM_URL_CQDG/StructureDefinition/AccessRequirements"
val POPULATION_S_D = s"$SYSTEM_URL_CQDG/StructureDefinition/ResearchStudy/population"
val DATASET_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/dataset"
val CONTACT_INSTITUTIONS_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/Contact/ContactInstitution"
val CONTACT_TYPE_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/Contact/ContactTypes"
val RESTRICTED_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/Restricted"
val EXPECTED_CONTENT_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/ResearchStudy/ExpectedContent"
val PRINCIPAL_INVESTIGATORS_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/PrincipalInvestigators"
val DATA_CATEGORY_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/DataCategoryExtension"
val STUDY_DESIGN_SD = s"$SYSTEM_URL_CQDG/StructureDefinition/StudyDesignExtension"
val DATA_COLLECTION_METHODS_DS = s"$SYSTEM_URL_CQDG/StructureDefinition/DataCollectionMethodExtension"
}
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,39 @@ object Transformations {
)


private val citationsOf: String => Column = (label: String) => filter(col("relatedArtifact"), c => c("label") === label)("citation")

val researchstudyMappings: List[Transformation] = List(
Custom(_
.select("fhir_id", "keyword", "study_id", "description", "contact", "category", "status", "title", "extension", "meta", "identifier")
.select("fhir_id", "keyword", "study_id", "description", "contact", "category", "status", "title", "extension", "meta", "identifier", "relatedArtifact")
.withColumn("keyword", extractKeywords(col("keyword")))
.withColumn(
"contact", transform(col("contact"), col => struct(col("telecom")(0)("system") as "type", col("telecom")(0)("value") as "value"))(0)
)
.withColumn("telecom", firstNonNull(transform(col("contact"), col => col("telecom")(0))))
.withColumn("access_authority", struct(col("telecom")("system") as "type", col("telecom")("value")))
.withColumn("contact_names", transform(filter(col("contact"), col => col("name").isNotNull), col => col("name")))
.withColumn("contact_extensions", flatten(col("contact")("extension")))
.withColumn("contact_institutions", transform(filter(col("contact_extensions"), col => col("url") === CONTACT_INSTITUTIONS_SD), col => col("valueString")))
.withColumn("contact_emails", transform(filter(col("contact_extensions"), col => col("url") === CONTACT_TYPE_SD), col => col("valueString")))
.withColumn("website", transform(filter(col("relatedArtifact"), col => col("label") === "StudyWebsite"), col => col("url")))
.withColumn("citation_statement", citationsOf("CitationStatement")(0))
.withColumn("selection_criteria", citationsOf("SelectionCriteria")(0))
.withColumn("funding_sources", citationsOf("FundingSource"))
.withColumn("expected_items", firstNonNull(filter(col("extension"), col => col("url") === EXPECTED_CONTENT_SD)))
.withColumn("expected_number_participants", filter(col("expected_items")("extension"), col => col("url") === "expectedNumberParticipants")(0)("valueInteger"))
.withColumn("expected_number_biospecimens", filter(col("expected_items")("extension"), col => col("url") === "expectedNumberBiospecimens")(0)("valueInteger"))
.withColumn("expected_number_files", filter(col("expected_items")("extension"), col => col("url") === "expectedNumberFiles")(0)("valueInteger"))
.withColumn("restricted_number_participants", filter(col("expected_items")("extension"), col => col("url") === "restrictedNumberParticipants")(0)("valueInteger"))
.withColumn("restricted_number_biospecimens", filter(col("expected_items")("extension"), col => col("url") === "restrictedNumberBiospecimens")(0)("valueInteger"))
.withColumn("restricted_number_files", filter(col("expected_items")("extension"), col => col("url") === "restrictedNumberFiles")(0)("valueInteger"))
.withColumn("principal_investigators", transform(firstNonNull(filter(col("extension"), col => col("url") === PRINCIPAL_INVESTIGATORS_SD))("extension"), col => col("valueString")))
.withColumn("data_categories", transform(filter(col("extension"), col => col("url") === DATA_CATEGORY_SD)("valueCoding"), col => when(
isnull(col("display")), col("code")
).otherwise(col("display"))))
.withColumn("study_designs", transform(filter(col("extension"), col => col("url") === STUDY_DESIGN_SD)("valueCoding"), col => when(
isnull(col("display")), col("code")
).otherwise(col("display"))))
.withColumn("data_collection_methods", transform(filter(col("extension"), col => col("url") === DATA_COLLECTION_METHODS_DS)("valueCoding"), col => when(
isnull(col("display")), col("code")
).otherwise(col("display"))))
.withColumn("domain", transform(col("category")("coding")(0), col => when(
isnull(col("display")), col("code")
).otherwise(col("display"))))
Expand All @@ -187,9 +213,10 @@ object Transformations {
filter(col, col => col("url") === "name")(0)("valueString") as "name",
filter(col, col => col("url") === "description")(0)("valueString") as "description"
)))
.withColumn("security", filter(col("meta")("security"), col => col("system") === SYSTEM_CONFIDENTIALITY)(0)("code"))
.withColumn("security", firstNonNull(transform(filter(col("extension"),
col => col("url") === RESTRICTED_SD), col => when(lower(col("valueBoolean")) === "true", lit("R")).otherwise(lit("U" )))))
),
Drop("extension", "category", "meta", "identifier", "data_sets_ext")
Drop("extension", "category", "meta", "identifier", "data_sets_ext", "telecom", "contact_extensions", "contact", "relatedArtifact", "expected_items")
)

val documentreferenceMappings: List[Transformation] = List(
Expand All @@ -199,12 +226,18 @@ object Transformations {
.select(columns.head, columns.tail: _*)
.withColumn("participant_id", regexp_extract(col("subject")("reference"), patientExtract, 1))
.withColumn("biospecimen_reference", regexp_extract(col("context")("related")(0)("reference"), specimenExtract, 1))
.withColumn("data_type_filtered",
.withColumn("data_type_cs",
filter(col("type")("coding"), col => col("system") === DOCUMENT_DATA_TYPE)(0)
)
.withColumn("data_type",
coalesce(col("data_type_filtered")("display"), col("data_type_filtered")("code")))
.withColumn("data_category", filter(col("category")(0)("coding"), col => col("system") === DOCUMENT_DATA_CATEGORY)(0)("code"))
.withColumn("data_type", when(isnull(col("data_type_cs")("display")), col("data_type_cs")("code"))
.otherwise(col("data_type_cs")("code"))
)
.withColumn("data_category_cs",
filter(col("category")(0)("coding"), col => col("system") === DOCUMENT_DATA_CATEGORY)(0)
)
.withColumn("data_category", when(isnull(col("data_category_cs")("display")), col("data_category_cs")("code"))
.otherwise(col("data_category_cs")("code"))
)
.withColumn("content_exp", explode(col("content")))
.withColumn("file_size", retrieveSize(firstNonNull(filter(col("content_exp")("attachment")("extension"), col => col("url") === DOCUMENT_SIZE_S_D))("fileSize")))
.withColumn("ferload_url", col("content_exp")("attachment")("url"))
Expand Down
Loading

0 comments on commit 41ca3ad

Please sign in to comment.