From 6048c858ed5d132be5115c250ef444867324a140 Mon Sep 17 00:00:00 2001
From: Adrian <adipaul1981@yahoo.ca>
Date: Fri, 24 Nov 2023 16:26:02 -0500
Subject: [PATCH] fix: CQDG-475 remove release_id for partition

---
 config/output/config/dev-cqdg.conf            | 54 +++++++------------
 config/output/config/prod-cqdg.conf           | 54 +++++++------------
 config/output/config/qa-cqdg.conf             | 54 +++++++------------
 .../etl/config/ConfigurationGenerator.scala   |  4 +-
 .../bio/ferlab/fhir/etl/PrepareIndex.scala    | 34 ++++++------
 .../etl/centricTypes/BiospecimenCentric.scala |  6 +--
 .../fhir/etl/centricTypes/FileCentric.scala   |  6 +--
 .../etl/centricTypes/ParticipantCentric.scala |  6 +--
 .../etl/centricTypes/SimpleParticipant.scala  |  5 +-
 .../fhir/etl/centricTypes/StudyCentric.scala  |  6 +--
 .../test/scala/BiospecimenCentricSpec.scala   |  2 +-
 .../src/test/scala/FileCentricSpec.scala      |  2 +-
 .../test/scala/ParticipantCentricSpec.scala   |  2 +-
 .../test/scala/SimpleParticipantSpec.scala    |  2 +-
 .../src/test/scala/StudyCentricSpec.scala     |  2 +-
 15 files changed, 88 insertions(+), 151 deletions(-)

diff --git a/config/output/config/dev-cqdg.conf b/config/output/config/dev-cqdg.conf
index 2275e852..7f56b727 100644
--- a/config/output/config/dev-cqdg.conf
+++ b/config/output/config/dev-cqdg.conf
@@ -1308,8 +1308,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/patient"
             readoptions {}
@@ -1353,8 +1352,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/diagnosis"
             readoptions {}
@@ -1398,8 +1396,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/cause_of_death"
             readoptions {}
@@ -1443,8 +1440,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/disease_status"
             readoptions {}
@@ -1488,8 +1484,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/phenotype"
             readoptions {}
@@ -1533,8 +1528,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/biospecimen"
             readoptions {}
@@ -1578,8 +1572,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/sample_registration"
             readoptions {}
@@ -1623,8 +1616,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/research_study"
             readoptions {}
@@ -1668,8 +1660,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/document_reference"
             readoptions {}
@@ -1713,8 +1704,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/family_relationship"
             readoptions {}
@@ -1758,8 +1748,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/tumor_normal_designation"
             readoptions {}
@@ -1803,8 +1792,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/group"
             readoptions {}
@@ -1848,8 +1836,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/task"
             readoptions {}
@@ -1936,8 +1923,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/simple_participant"
             readoptions {}
@@ -1956,8 +1942,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/study_centric"
             readoptions {}
@@ -1980,8 +1965,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/participant_centric"
             readoptions {}
@@ -2004,8 +1988,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/file_centric"
             readoptions {}
@@ -2028,8 +2011,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/biospecimen_centric"
             readoptions {}
diff --git a/config/output/config/prod-cqdg.conf b/config/output/config/prod-cqdg.conf
index b66731d3..79d4adb1 100644
--- a/config/output/config/prod-cqdg.conf
+++ b/config/output/config/prod-cqdg.conf
@@ -1308,8 +1308,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/patient"
             readoptions {}
@@ -1353,8 +1352,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/diagnosis"
             readoptions {}
@@ -1398,8 +1396,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/cause_of_death"
             readoptions {}
@@ -1443,8 +1440,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/disease_status"
             readoptions {}
@@ -1488,8 +1484,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/phenotype"
             readoptions {}
@@ -1533,8 +1528,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/biospecimen"
             readoptions {}
@@ -1578,8 +1572,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/sample_registration"
             readoptions {}
@@ -1623,8 +1616,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/research_study"
             readoptions {}
@@ -1668,8 +1660,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/document_reference"
             readoptions {}
@@ -1713,8 +1704,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/family_relationship"
             readoptions {}
@@ -1758,8 +1748,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/tumor_normal_designation"
             readoptions {}
@@ -1803,8 +1792,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/group"
             readoptions {}
@@ -1848,8 +1836,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/task"
             readoptions {}
@@ -1936,8 +1923,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/simple_participant"
             readoptions {}
@@ -1956,8 +1942,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/study_centric"
             readoptions {}
@@ -1980,8 +1965,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/participant_centric"
             readoptions {}
@@ -2004,8 +1988,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/file_centric"
             readoptions {}
@@ -2028,8 +2011,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/biospecimen_centric"
             readoptions {}
diff --git a/config/output/config/qa-cqdg.conf b/config/output/config/qa-cqdg.conf
index c7ac797e..b00dceb4 100644
--- a/config/output/config/qa-cqdg.conf
+++ b/config/output/config/qa-cqdg.conf
@@ -1308,8 +1308,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/patient"
             readoptions {}
@@ -1353,8 +1352,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/diagnosis"
             readoptions {}
@@ -1398,8 +1396,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/cause_of_death"
             readoptions {}
@@ -1443,8 +1440,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/disease_status"
             readoptions {}
@@ -1488,8 +1484,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/phenotype"
             readoptions {}
@@ -1533,8 +1528,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/biospecimen"
             readoptions {}
@@ -1578,8 +1572,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/sample_registration"
             readoptions {}
@@ -1623,8 +1616,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/research_study"
             readoptions {}
@@ -1668,8 +1660,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/document_reference"
             readoptions {}
@@ -1713,8 +1704,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/family_relationship"
             readoptions {}
@@ -1758,8 +1748,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/tumor_normal_designation"
             readoptions {}
@@ -1803,8 +1792,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/group"
             readoptions {}
@@ -1848,8 +1836,7 @@ datalake {
             keys=[]
             loadtype=OverWritePartition
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/normalized/task"
             readoptions {}
@@ -1936,8 +1923,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/simple_participant"
             readoptions {}
@@ -1956,8 +1942,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/study_centric"
             readoptions {}
@@ -1980,8 +1965,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/participant_centric"
             readoptions {}
@@ -2004,8 +1988,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/file_centric"
             readoptions {}
@@ -2028,8 +2011,7 @@ datalake {
             keys=[]
             loadtype=OverWrite
             partitionby=[
-                "study_id",
-                "release_id"
+                "study_id"
             ]
             path="/es_index/fhir/biospecimen_centric"
             readoptions {}
diff --git a/config/src/main/scala/bio/ferlab/fhir/etl/config/ConfigurationGenerator.scala b/config/src/main/scala/bio/ferlab/fhir/etl/config/ConfigurationGenerator.scala
index d7ac7f19..92f25cd3 100644
--- a/config/src/main/scala/bio/ferlab/fhir/etl/config/ConfigurationGenerator.scala
+++ b/config/src/main/scala/bio/ferlab/fhir/etl/config/ConfigurationGenerator.scala
@@ -17,7 +17,7 @@ object ConfigurationGenerator extends App {
     sources.map(ds => ds.copy(table = ds.table.map(t => TableConf(tableName, t.name))))
   }
 
-  private val partitionByStudyIdAndReleaseId = List("study_id", "release_id")
+  private val partitionByStudyIdAndReleaseId = List("study_id")
   val sourceNames: Seq[SourceConfig] = Seq(
     SourceConfig("patient", None, partitionByStudyIdAndReleaseId),
     SourceConfig("condition", Some("diagnosis"), partitionByStudyIdAndReleaseId),
@@ -47,7 +47,7 @@ object ConfigurationGenerator extends App {
         path = s"/fhir/$rawPath",
         format = AVRO,
         loadtype = OverWrite,
-        partitionby = source.partitionBy
+        partitionby = source.partitionBy :+ "release_id"
       ),
       DatasetConf(
         id = s"normalized_$tableName",
diff --git a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/PrepareIndex.scala b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/PrepareIndex.scala
index e4c0fd41..a717c6ff 100644
--- a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/PrepareIndex.scala
+++ b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/PrepareIndex.scala
@@ -6,7 +6,7 @@ import bio.ferlab.fhir.etl.centricTypes.{BiospecimenCentric, FileCentric, Partic
 object PrepareIndex extends SparkApp {
   println(s"ARGS: " + args.mkString("[", ", ", "]"))
 
-  val Array(_, _, jobName, releaseId, studyIds) = args
+  val Array(_, _, jobName, studyIds) = args
 
   implicit val (conf, _, spark) = init()
 
@@ -16,25 +16,25 @@ object PrepareIndex extends SparkApp {
 
   jobName match {
     case "study_centric" =>
-      new SimpleParticipant(releaseId, studyList).run()
-      new StudyCentric(releaseId, studyList).run()
+      new SimpleParticipant(studyList).run()
+      new StudyCentric(studyList).run()
     case "participant_centric" =>
-      new StudyCentric(releaseId, studyList).run()
-      new SimpleParticipant(releaseId, studyList).run()
-      new ParticipantCentric(releaseId, studyList).run()
+      new StudyCentric(studyList).run()
+      new SimpleParticipant(studyList).run()
+      new ParticipantCentric(studyList).run()
     case "file_centric" =>
-      new StudyCentric(releaseId, studyList).run()
-      new SimpleParticipant(releaseId, studyList).run()
-      new FileCentric(releaseId, studyList).run()
+      new StudyCentric(studyList).run()
+      new SimpleParticipant(studyList).run()
+      new FileCentric(studyList).run()
     case "biospecimen_centric" =>
-      new StudyCentric(releaseId, studyList).run()
-      new SimpleParticipant(releaseId, studyList).run()
-      new BiospecimenCentric(releaseId, studyList).run()
+      new StudyCentric(studyList).run()
+      new SimpleParticipant(studyList).run()
+      new BiospecimenCentric(studyList).run()
     case "all" =>
-      new StudyCentric(releaseId, studyList).run()
-      new SimpleParticipant(releaseId, studyList).run()
-      new ParticipantCentric(releaseId, studyList).run()
-      new FileCentric(releaseId, studyList).run()
-      new BiospecimenCentric(releaseId, studyList).run()
+      new StudyCentric(studyList).run()
+      new SimpleParticipant(studyList).run()
+      new ParticipantCentric(studyList).run()
+      new FileCentric(studyList).run()
+      new BiospecimenCentric(studyList).run()
   }
 }
diff --git a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/BiospecimenCentric.scala b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/BiospecimenCentric.scala
index fa24ec92..2b11fff5 100644
--- a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/BiospecimenCentric.scala
+++ b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/BiospecimenCentric.scala
@@ -9,7 +9,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 
 import java.time.LocalDateTime
 
-class BiospecimenCentric(releaseId: String, studyIds: List[String])(implicit configuration: Configuration) extends ETL {
+class BiospecimenCentric(studyIds: List[String])(implicit configuration: Configuration) extends ETL {
 
   override val mainDestination: DatasetConf = conf.getDataset("es_index_biospecimen_centric")
   val normalized_biospecimen: DatasetConf = conf.getDataset("normalized_biospecimen")
@@ -24,9 +24,7 @@ class BiospecimenCentric(releaseId: String, studyIds: List[String])(implicit con
                        currentRunDateTime: LocalDateTime = LocalDateTime.now())(implicit spark: SparkSession): Map[String, DataFrame] = {
 
     Seq(normalized_biospecimen, normalized_drs_document_reference, simple_participant, es_index_study_centric, normalized_sequencing_experiment, normalized_sample_registration, es_index_file_centric)
-      .map(ds => ds.id -> ds.read.where(col("release_id") === releaseId)
-        .where(col("study_id").isin(studyIds: _*))
-      ).toMap
+      .map(ds => ds.id -> ds.read.where(col("study_id").isin(studyIds: _*))).toMap
   }
 
   override def transform(data: Map[String, DataFrame],
diff --git a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/FileCentric.scala b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/FileCentric.scala
index ea16faf6..30639e6a 100644
--- a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/FileCentric.scala
+++ b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/FileCentric.scala
@@ -9,7 +9,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession, functions}
 
 import java.time.LocalDateTime
 
-class FileCentric(releaseId: String, studyIds: List[String])(implicit configuration: Configuration) extends ETL {
+class FileCentric(studyIds: List[String])(implicit configuration: Configuration) extends ETL {
 
   override val mainDestination: DatasetConf = conf.getDataset("es_index_file_centric")
   val normalized_drs_document_reference: DatasetConf = conf.getDataset("normalized_document_reference")
@@ -22,9 +22,7 @@ class FileCentric(releaseId: String, studyIds: List[String])(implicit configurat
   override def extract(lastRunDateTime: LocalDateTime = minDateTime,
                        currentRunDateTime: LocalDateTime = LocalDateTime.now())(implicit spark: SparkSession): Map[String, DataFrame] = {
     Seq(normalized_drs_document_reference, normalized_biospecimen, simple_participant, es_index_study_centric, normalized_sequencing_experiment, normalized_sample_registration)
-      .map(ds => ds.id -> ds.read.where(col("release_id") === releaseId)
-        .where(col("study_id").isin(studyIds: _*))
-    ).toMap
+      .map(ds => ds.id -> ds.read.where(col("study_id").isin(studyIds: _*))).toMap
   }
 
   override def transform(data: Map[String, DataFrame],
diff --git a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/ParticipantCentric.scala b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/ParticipantCentric.scala
index b7e6c871..7c6c3f9e 100644
--- a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/ParticipantCentric.scala
+++ b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/ParticipantCentric.scala
@@ -9,7 +9,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 
 import java.time.LocalDateTime
 
-class ParticipantCentric(releaseId: String, studyIds: List[String])(implicit configuration: Configuration) extends ETL {
+class ParticipantCentric(studyIds: List[String])(implicit configuration: Configuration) extends ETL {
 
   override val mainDestination: DatasetConf = conf.getDataset("es_index_participant_centric")
   val simple_participant: DatasetConf = conf.getDataset("simple_participant")
@@ -22,9 +22,7 @@ class ParticipantCentric(releaseId: String, studyIds: List[String])(implicit con
   override def extract(lastRunDateTime: LocalDateTime = minDateTime,
                        currentRunDateTime: LocalDateTime = LocalDateTime.now())(implicit spark: SparkSession): Map[String, DataFrame] = {
     Seq(simple_participant, normalized_drs_document_reference, normalized_biospecimen, normalized_sequencing_experiment, normalized_sample_registration, es_index_study_centric)
-      .map(ds => ds.id -> ds.read.where(col("release_id") === releaseId)
-                            .where(col("study_id").isin(studyIds: _*))
-      ).toMap
+      .map(ds => ds.id -> ds.read.where(col("study_id").isin(studyIds: _*))).toMap
   }
 
   override def transform(data: Map[String, DataFrame],
diff --git a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/SimpleParticipant.scala b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/SimpleParticipant.scala
index f1dbbe68..3573a4a6 100644
--- a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/SimpleParticipant.scala
+++ b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/SimpleParticipant.scala
@@ -9,7 +9,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 
 import java.time.LocalDateTime
 
-class SimpleParticipant(releaseId: String, studyIds: List[String])(implicit configuration: Configuration) extends ETL {
+class SimpleParticipant(studyIds: List[String])(implicit configuration: Configuration) extends ETL {
 
   override val mainDestination: DatasetConf = conf.getDataset("simple_participant")
   val normalized_patient: DatasetConf = conf.getDataset("normalized_patient")
@@ -31,8 +31,7 @@ class SimpleParticipant(releaseId: String, studyIds: List[String])(implicit conf
     (Seq(
       normalized_patient, normalized_phenotype, normalized_disease, normalized_disease_status, normalized_cause_of_death,
       normalized_group, normalized_family_relationship, normalized_researchstudy)
-      .map(ds => ds.id -> ds.read.where(col("release_id") === releaseId)
-        .where(col("study_id").isin(studyIds: _*))
+      .map(ds => ds.id -> ds.read.where(col("study_id").isin(studyIds: _*))
       ) ++ Seq(
       hpo_terms.id -> hpo_terms.read,
       mondo_terms.id -> mondo_terms.read,
diff --git a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/StudyCentric.scala b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/StudyCentric.scala
index 5c1a857f..874e11cc 100644
--- a/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/StudyCentric.scala
+++ b/prepare-index/src/main/scala/bio/ferlab/fhir/etl/centricTypes/StudyCentric.scala
@@ -9,7 +9,7 @@ import org.apache.spark.sql.functions.{col, transform => sparkTransform, _}
 
 import java.time.LocalDateTime
 
-class StudyCentric(releaseId: String, studyIds: List[String])(implicit configuration: Configuration) extends ETL {
+class StudyCentric(studyIds: List[String])(implicit configuration: Configuration) extends ETL {
 
   override val mainDestination: DatasetConf = conf.getDataset("es_index_study_centric")
   val normalized_researchstudy: DatasetConf = conf.getDataset("normalized_research_study")
@@ -29,9 +29,7 @@ class StudyCentric(releaseId: String, studyIds: List[String])(implicit configura
       normalized_researchstudy, normalized_drs_document_reference, normalized_patient, normalized_group,
       normalized_diagnosis, normalized_task, normalized_phenotype, normalized_sequencing_experiment, normalized_biospecimen,
       normalized_sample_registration)
-      .map(ds => ds.id -> ds.read.where(col("release_id") === releaseId)
-        .where(col("study_id").isin(studyIds: _*))
-      ).toMap
+      .map(ds => ds.id -> ds.read.where(col("study_id").isin(studyIds: _*))).toMap
 
   }
 
diff --git a/prepare-index/src/test/scala/BiospecimenCentricSpec.scala b/prepare-index/src/test/scala/BiospecimenCentricSpec.scala
index 689f5048..b9de48ae 100644
--- a/prepare-index/src/test/scala/BiospecimenCentricSpec.scala
+++ b/prepare-index/src/test/scala/BiospecimenCentricSpec.scala
@@ -73,7 +73,7 @@ class BiospecimenCentricSpec extends AnyFlatSpec with Matchers with WithSparkSes
       ).toDF(),
     )
 
-    val output = new BiospecimenCentric("5", List("STU0000001"))(conf).transform(data)
+    val output = new BiospecimenCentric(List("STU0000001"))(conf).transform(data)
 
     output.keys should contain("es_index_biospecimen_centric")
 
diff --git a/prepare-index/src/test/scala/FileCentricSpec.scala b/prepare-index/src/test/scala/FileCentricSpec.scala
index 67507f6a..871b4d04 100644
--- a/prepare-index/src/test/scala/FileCentricSpec.scala
+++ b/prepare-index/src/test/scala/FileCentricSpec.scala
@@ -42,7 +42,7 @@ class FileCentricSpec extends AnyFlatSpec with Matchers with WithSparkSession {
       ).toDF(),
     )
 
-    val output = new FileCentric("5", List("STU0000001"))(conf).transform(data)
+    val output = new FileCentric(List("STU0000001"))(conf).transform(data)
     output.keys should contain("es_index_file_centric")
 
     val file_centric = output("es_index_file_centric").as[FILE_CENTRIC].collect()
diff --git a/prepare-index/src/test/scala/ParticipantCentricSpec.scala b/prepare-index/src/test/scala/ParticipantCentricSpec.scala
index db4babba..f365ccb6 100644
--- a/prepare-index/src/test/scala/ParticipantCentricSpec.scala
+++ b/prepare-index/src/test/scala/ParticipantCentricSpec.scala
@@ -37,7 +37,7 @@ class ParticipantCentricSpec extends AnyFlatSpec with Matchers with WithSparkSes
       "es_index_study_centric" -> Seq(STUDY_CENTRIC()).toDF(),
     )
 
-    val output = new ParticipantCentric("re_000001", List("SD_Z6MWD3H0"))(conf).transform(data)
+    val output = new ParticipantCentric(List("SD_Z6MWD3H0"))(conf).transform(data)
 
     output.keys should contain("es_index_participant_centric")
 
diff --git a/prepare-index/src/test/scala/SimpleParticipantSpec.scala b/prepare-index/src/test/scala/SimpleParticipantSpec.scala
index 08d4cfd5..21b01fe9 100644
--- a/prepare-index/src/test/scala/SimpleParticipantSpec.scala
+++ b/prepare-index/src/test/scala/SimpleParticipantSpec.scala
@@ -42,7 +42,7 @@ class SimpleParticipantSpec extends AnyFlatSpec with Matchers with WithSparkSess
       "icd_terms" -> read(getClass.getResource("/icd_terms.json").toString, "Json", Map(), None, None)
     )
 
-    val output = new SimpleParticipant("re_000001", List("SD_Z6MWD3H0"))(conf).transform(data)
+    val output = new SimpleParticipant(List("SD_Z6MWD3H0"))(conf).transform(data)
 
     output.keys should contain("simple_participant")
 
diff --git a/prepare-index/src/test/scala/StudyCentricSpec.scala b/prepare-index/src/test/scala/StudyCentricSpec.scala
index c35aece2..6d356797 100644
--- a/prepare-index/src/test/scala/StudyCentricSpec.scala
+++ b/prepare-index/src/test/scala/StudyCentricSpec.scala
@@ -65,7 +65,7 @@ class StudyCentricSpec extends AnyFlatSpec with Matchers with WithSparkSession {
       "duo_terms" -> read(getClass.getResource("/duo_terms.csv").toString, "csv", Map("header" -> "true"), None, None),
     )
 
-    val output = new StudyCentric("5", List("STU0000001"))(conf).transform(data)
+    val output = new StudyCentric(List("STU0000001"))(conf).transform(data)
 
     output.keys should contain("es_index_study_centric")