From a022ddb257640318e43b634cad457215e4a007df Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 17:02:48 +0530 Subject: [PATCH 01/55] Issue SB-24793 feat: Assessment data archival data product job implementation and testcase --- .../job/report/AssessmentArchivalJob.scala | 104 ++++++++++++++++++ .../assessment_aggregator.csv | 9 ++ .../report/TestAssessmentArchivalJob.scala | 83 ++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala create mode 100644 data-products/src/test/resources/assessment-archival/assessment_aggregator.csv create mode 100644 data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala new file mode 100644 index 000000000..da375e32e --- /dev/null +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -0,0 +1,104 @@ +package org.sunbird.analytics.job.report + +import org.apache.spark.SparkContext +import org.apache.spark.sql.functions.{col, to_timestamp, weekofyear, year} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.ekstep.analytics.framework.Level.INFO +import org.ekstep.analytics.framework.conf.AppConf +import org.ekstep.analytics.framework.util.DatasetUtil.extensions +import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} +import org.ekstep.analytics.framework.{FrameworkContext, IJob, JobConfig} + +import java.util.concurrent.CompletableFuture +import java.util.function.Supplier + +object AssessmentArchivalJob extends optional.Application with IJob with BaseReportsJob { + val cassandraUrl = "org.apache.spark.sql.cassandra" + private val assessmentAggDBSettings: Map[String, String] = Map("table" -> "assessment_aggregator", "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") + implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" + private val partitionCols = List("batch_id", "year", "week_of_year") + + case class BatchPartition(batch_id: String, year: Int, week_of_year: Int) + + override def main(config: String)(implicit sc: Option[SparkContext], fc: Option[FrameworkContext]): Unit = { + + implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" + val jobName = "AssessmentArchivalJob" + JobLogger.init(jobName) + JobLogger.start(s"$jobName started executing", Option(Map("config" -> config, "model" -> jobName))) + implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](config) + implicit val spark: SparkSession = openSparkSession(jobConfig) + implicit val frameworkContext: FrameworkContext = getReportingFrameworkContext() + val modelParams = jobConfig.modelParams.get + val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] + try { + val res = CommonUtil.time(archiveData(spark, fetchData, "", jobConfig)) + if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) + JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> res._2.length))) + } finally { + frameworkContext.closeContext() + spark.close() + } + + + } + + // $COVERAGE-ON$ + def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, url: String, jobConfig: JobConfig): Array[Map[String, Any]] = { + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) + .withColumn("updated_on", to_timestamp(col("updated_on"))) + .withColumn("year", year(col("updated_on"))) + .withColumn("week_of_year", weekofyear(col("updated_on"))) + + val archivedBatchList = assessmentData.groupBy(col("batch_id"), col("year"), col("week_of_year")).count().collect() + JobLogger.log(s"Total Batches to Archive By Year & Week ${archivedBatchList.length}", None, INFO) + + val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => + BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) + + val archivedBatchResult = for (batch <- batchesToArchive) yield { + val filteredDF = assessmentData + .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) + syncToCloud(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + } + CompletableFuture.allOf(archivedBatchResult: _*) + archivedBatchResult.map(f => f.join()) + } + + def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame): DataFrame = { + fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) + } + + def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { + sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") + JobLogger.log(s"The Job Cleared The Table Data SuccessFully, Please Execute The Compaction", None, INFO) + } + + def syncToCloud(archivedData: DataFrame, batch: BatchPartition, conf: JobConfig): CompletableFuture[Map[String, Any]] = { + CompletableFuture.supplyAsync(new Supplier[Map[String, Any]]() { + override def get(): Map[String, Any] = { + val res = CommonUtil.time(upload(archivedData, s"${batch.batch_id}-${batch.year}-${batch.week_of_year}", conf)) + val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "time_taken" -> res._1, "total_records" -> archivedData.count()) + JobLogger.log(s"Data is archived for ", Some(metrics), INFO) + metrics + } + }) + } + + def upload(reportData: DataFrame, + fileName: String, + jobConfig: JobConfig): List[String] = { + val modelParams = jobConfig.modelParams.get + val reportPath: String = modelParams.getOrElse("reportPath", "archival-data/").asInstanceOf[String] + val container = AppConf.getConfig("cloud.container.reports") + val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") + val storageConfig = getStorageConfig( + container, + objectKey, + jobConfig) + JobLogger.log(s"Uploading reports to blob storage", None, INFO) + reportData.saveToBlobStore(storageConfig, "csv", s"$reportPath$fileName-${System.currentTimeMillis()}", Option(Map("header" -> "true")), None) + } + +} diff --git a/data-products/src/test/resources/assessment-archival/assessment_aggregator.csv b/data-products/src/test/resources/assessment-archival/assessment_aggregator.csv new file mode 100644 index 000000000..088edc97e --- /dev/null +++ b/data-products/src/test/resources/assessment-archival/assessment_aggregator.csv @@ -0,0 +1,9 @@ +content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question +do_112835335135993856149,A3,user030,do_1125559882615357441175,1010,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06 09:59:51.000+0000,"10/2","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112835335135993856149,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,20,4,2019-09-06 09:59:51.000+0000,"2/2","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112835336280596480151,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:58:51.000+0000,30,10,2019-09-06 09:59:51.000+0000,"4/4","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112835336280596480151,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:57:51.000+0000,30,8,2019-09-06 09:59:51.000+0000,"4/4","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112832394979106816112,A1,user015,do_112695422838472704115,1005,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06 09:59:51.000+0000,"4/4","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112832394979106816112,A2,user030,do_1125559882615357441175,1010,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06 09:59:51.000+0000,"4/4","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112832394979106816112,A6,user026,do_1126458775024025601296,1006,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,30,10,2019-09-06 09:59:51.000+0000,"5/5","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112832394979106816112,A6,user021,do_1126458775024025601296,1006,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,30,10,2019-09-06 09:59:51.000+0000,"6/6","[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" \ No newline at end of file diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala new file mode 100644 index 000000000..f747d74f6 --- /dev/null +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -0,0 +1,83 @@ +package org.sunbird.analytics.job.report + + +import org.apache.spark.sql.functions.udf +import org.apache.spark.sql.types.{ArrayType, MapType, StringType, StructType} +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.ekstep.analytics.framework.conf.AppConf +import org.ekstep.analytics.framework.util.{HadoopFileUtil, JSONUtils} +import org.ekstep.analytics.framework.{FrameworkContext, JobConfig} +import org.scalamock.scalatest.MockFactory + +import scala.collection.mutable + + +class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { + + var spark: SparkSession = _ + + var assessmentAggDF: DataFrame = _ + var reporterMock: BaseReportsJob = mock[BaseReportsJob] + val sunbirdCoursesKeyspace = "sunbird_courses" + + override def beforeAll(): Unit = { + super.beforeAll() + spark = getSparkSession(); + assessmentAggDF = spark + .read + .format("com.databricks.spark.csv") + .option("header", "true") + .load("src/test/resources/assessment-archival/assessment_aggregator.csv") + .cache() + } + + override def afterAll(): Unit = { + super.afterAll() + val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") + new HadoopFileUtil().delete(spark.sparkContext.hadoopConfiguration, objectKey + "collection-summary-reports-v2/") + } + + val convertMethod = udf((value: mutable.WrappedArray[String]) => { + if (null != value && value.nonEmpty) + value.toList.map(str => JSONUtils.deserialize(str)(manifest[Map[String, String]])).toArray + else null + }, new ArrayType(MapType(StringType, StringType), true)) + + it should "Should able to archive the batch data" in { + initializeDefaultMockData() + implicit val mockFc: FrameworkContext = mock[FrameworkContext] + val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"truncateData":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin + implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) + val reportData = AssessmentArchivalJob.archiveData(spark, reporterMock.fetchData, "/Users/manjunathdavanam/Documents/Projects.nosync/Sunbird/partition-data", jobConfig) + + val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1010") + batch_1.foreach(res => res("year") === "2019") + batch_1.foreach(res => res("total_records") === "2") + batch_1.foreach(res => res("week_of_year") === "36") + + + val batch_2 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1001") + batch_2.foreach(res => res("year") === "2019") + batch_2.foreach(res => res("total_records") === "3") + batch_2.foreach(res => res("week_of_year") === "36") + + + val batch_3 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1005") + batch_3.foreach(res => res("year") === "2019") + batch_3.foreach(res => res("total_records") === "1") + batch_3.foreach(res => res("week_of_year") === "36") + + + val batch_4 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1006") + batch_4.foreach(res => res("year") === "2019") + batch_4.foreach(res => res("total_records") === "2") + batch_4.foreach(res => res("week_of_year") === "36") + + } + + def initializeDefaultMockData() { + (reporterMock.fetchData _) + .expects(spark, Map("table" -> "assessment_aggregator", "keyspace" -> sunbirdCoursesKeyspace, "cluster" -> "LMSCluster"), "org.apache.spark.sql.cassandra", new StructType()) + .returning(assessmentAggDF) + } +} \ No newline at end of file From fd2ac9c5650c3ca87c1370f2ce27313f3807b6a2 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 17:07:39 +0530 Subject: [PATCH 02/55] Issue SB-24793 feat: Assessment data archival data product job implementation and testcase --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 4 ++-- .../analytics/job/report/TestAssessmentArchivalJob.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index da375e32e..55ec1c68a 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -33,7 +33,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val modelParams = jobConfig.modelParams.get val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] try { - val res = CommonUtil.time(archiveData(spark, fetchData, "", jobConfig)) + val res = CommonUtil.time(archiveData(spark, fetchData, jobConfig)) if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> res._2.length))) } finally { @@ -45,7 +45,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } // $COVERAGE-ON$ - def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, url: String, jobConfig: JobConfig): Array[Map[String, Any]] = { + def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index f747d74f6..fd33988d9 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -34,7 +34,7 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { override def afterAll(): Unit = { super.afterAll() val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") - new HadoopFileUtil().delete(spark.sparkContext.hadoopConfiguration, objectKey + "collection-summary-reports-v2/") + new HadoopFileUtil().delete(spark.sparkContext.hadoopConfiguration, objectKey + "assessment-archival") } val convertMethod = udf((value: mutable.WrappedArray[String]) => { @@ -48,7 +48,7 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"truncateData":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.archiveData(spark, reporterMock.fetchData, "/Users/manjunathdavanam/Documents/Projects.nosync/Sunbird/partition-data", jobConfig) + val reportData = AssessmentArchivalJob.archiveData(spark, reporterMock.fetchData, jobConfig) val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1010") batch_1.foreach(res => res("year") === "2019") From b42df8fc546fadeb8a4bcc2ef1baeef801aecbb6 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 17:20:01 +0530 Subject: [PATCH 03/55] Issue SB-24793 feat: removed the hardcoded column names in groupBy --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 55ec1c68a..9e1cb6567 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -50,8 +50,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) - - val archivedBatchList = assessmentData.groupBy(col("batch_id"), col("year"), col("week_of_year")).count().collect() + val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() JobLogger.log(s"Total Batches to Archive By Year & Week ${archivedBatchList.length}", None, INFO) val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => From 4ca0b4cd9b8c2a2e5cca9b3768841533d61ad1a6 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 17:29:37 +0530 Subject: [PATCH 04/55] Issue SB-24793 feat: Assessment data archival data product job implementation and testcase --- .../job/report/AssessmentArchivalJob.scala | 4 ++-- .../.1001-2019-36-1626263593647.csv.crc | Bin 0 -> 24 bytes .../.1005-2019-36-1626263593672.csv.crc | Bin 0 -> 16 bytes .../.1006-2019-36-1626263593695.csv.crc | Bin 0 -> 20 bytes .../.1010-2019-36-1626263593639.csv.crc | Bin 0 -> 20 bytes .../archival-data/1001-2019-36-1626263593647.csv | 4 ++++ .../archival-data/1005-2019-36-1626263593672.csv | 2 ++ .../archival-data/1006-2019-36-1626263593695.csv | 3 +++ .../archival-data/1010-2019-36-1626263593639.csv | 3 +++ .../resources/reports/archival-data/Archive.zip | Bin 0 -> 2562 bytes 10 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 data-products/src/test/resources/reports/archival-data/.1001-2019-36-1626263593647.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/.1005-2019-36-1626263593672.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/.1006-2019-36-1626263593695.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/.1010-2019-36-1626263593639.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv create mode 100644 data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv create mode 100644 data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv create mode 100644 data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv create mode 100644 data-products/src/test/resources/reports/archival-data/Archive.zip diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 9e1cb6567..17dc3a530 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -34,14 +34,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] try { val res = CommonUtil.time(archiveData(spark, fetchData, jobConfig)) + val total_archived_files = res._2.length if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) - JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> res._2.length))) + JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) } finally { frameworkContext.closeContext() spark.close() } - } // $COVERAGE-ON$ diff --git a/data-products/src/test/resources/reports/archival-data/.1001-2019-36-1626263593647.csv.crc b/data-products/src/test/resources/reports/archival-data/.1001-2019-36-1626263593647.csv.crc new file mode 100644 index 0000000000000000000000000000000000000000..5930ba67fa6f3830983590c197ba242f21fd2a80 GIT binary patch literal 24 fcmYc;N@ieSU}88lpWTu_h&$=b6pP#of@RkLP$&o7 literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/reports/archival-data/.1005-2019-36-1626263593672.csv.crc b/data-products/src/test/resources/reports/archival-data/.1005-2019-36-1626263593672.csv.crc new file mode 100644 index 0000000000000000000000000000000000000000..52523c8c3e27811125e71048892dd2f89a64f881 GIT binary patch literal 16 XcmYc;N@ieSU}9JjwC{k9b$B8GByI%G literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/reports/archival-data/.1006-2019-36-1626263593695.csv.crc b/data-products/src/test/resources/reports/archival-data/.1006-2019-36-1626263593695.csv.crc new file mode 100644 index 0000000000000000000000000000000000000000..7ab5371d78e723ada6c815b1fefb3b03319eaf54 GIT binary patch literal 20 bcmYc;N@ieSU}D&sEO2~xu|uoO6^`QoJdFn2 literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/reports/archival-data/.1010-2019-36-1626263593639.csv.crc b/data-products/src/test/resources/reports/archival-data/.1010-2019-36-1626263593639.csv.crc new file mode 100644 index 0000000000000000000000000000000000000000..9280088be2cc077cae1a911d085e523b81d63cba GIT binary patch literal 20 ccmYc;N@ieSU}CuH$TI)tLeZXiYvLXP06t&`m;e9( literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv b/data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv new file mode 100644 index 000000000..efb3f4dd8 --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv @@ -0,0 +1,4 @@ +content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question +do_112835335135993856149,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,20,4,2019-09-06T15:29:51.000+05:30,2/2,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112835336280596480151,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:58:51.000+0000,30,10,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112835336280596480151,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:57:51.000+0000,30,8,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv b/data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv new file mode 100644 index 000000000..43b7da208 --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv @@ -0,0 +1,2 @@ +content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question +do_112832394979106816112,A1,user015,do_112695422838472704115,1005,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv b/data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv new file mode 100644 index 000000000..6d0881917 --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv @@ -0,0 +1,3 @@ +content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question +do_112832394979106816112,A6,user026,do_1126458775024025601296,1006,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,30,10,2019-09-06T15:29:51.000+05:30,5/5,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112832394979106816112,A6,user021,do_1126458775024025601296,1006,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,30,10,2019-09-06T15:29:51.000+05:30,6/6,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv b/data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv new file mode 100644 index 000000000..30ca494cd --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv @@ -0,0 +1,3 @@ +content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question +do_112835335135993856149,A3,user030,do_1125559882615357441175,1010,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06T15:29:51.000+05:30,10/2,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" +do_112832394979106816112,A2,user030,do_1125559882615357441175,1010,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/Archive.zip b/data-products/src/test/resources/reports/archival-data/Archive.zip new file mode 100644 index 0000000000000000000000000000000000000000..7df887b6d57c259fb81688cb8efc7f956c9d102e GIT binary patch literal 2562 zcmbuBe>BtkAIG;5%kOH#O-5sW3^RtzMJd#1Vxh=2Nv1Tuex&4*t{t@L+Wn#e7}9G<9p6Mr}yi8&gb*_=lwkA{dm4#&zHTekT3)Q z=GRR8A_4H9r6URezyQhsJv3TRM<1<+)xj9)=o#tr3uB1I7#ZwCoe2tYa+Cy!B6}A- z*!)NEzIa(202F=?1OWJZ0ssrtF2Cn!2xZmumvp>rP06piwF@chAaq$)B77p#&eF4} zTByW&dC|B-LeX1DK{nPpBM`;$U)!_UnU#n4sgv!JSwW*-PqMN|HRp_O(PcMCO$*4ds=_I zss6G{Sr__@w=_)m3b3+cy405Ja#pXuSmPWvRTmIqF}X<8_iD&18{p2T*3^Xr25!5y z=TgUQUwu~@2k(c<%4525>M}XO|dm<&u5+|HLIp)F!_PWf{s+xY%TZv~7K4z|J7%bGgCpMa|@5 zmrQc;@EGV789Fs8H-b}`s$w@|map#jwDuvX3=dXO4kivt2N{d3^b_m*L}~d%nfIqr zq0zB-0y08mJcB}X(z)p{S{%FK9JixQW>iJwodW#5P>u8j3B?{TDuD>SKb!hRYt`Ejbq3?X!x zD@Wsuj{ps*2KE(ZjgfM4h))44!q5l3mxTL=NfqJe>QQb5?_@X68Lop%@T8s-m!rS~ zQf04o*DH0B*abPXdK0bL3k?P&v=2wBU**WlevDV6=^^&6rB z>^qQka|wLx0=Qda=LvttZnPD4tl?MKJN{pDX!4_>|Hhsm9%B+mF1z&FSyQ*yFU-~1 z=e(K=q%4j+Y{?8?-93nR4(lv2gY1=m`tv6?W}RgS#NQpE90Uu_8dTQIpAH$A|WF6i6lgP+F| zU&BCd$VMKAYgbVNkGVqdW2!Q#qxx>w-Rna#FUZtxtnMn8R-I3!;dh=GNR;BkpFpie zl6f3fQr+=S-a`;XV@ZQ=9tDl{qhu+Ew80V zijI+Uc`mE0n^H&DKIEZkBnwX!!?CtYG=yn(*-0Ez#5gMF1+7Iy&5RB7Sf4r5&pO%W zLwPH2*Th@9KRS8+RzWUucC>slDgF+b*7iuO9D6UrCfMP9cbS6z7M;702PcDniw(dwn#1NVNPN68_dKdicS)nbM^e zMCbiJC1(XCCsIGXUETEVT@XT!GgcP05OXXbpQYW literal 0 HcmV?d00001 From b6d373378e5fa53082132925f0abce371d73c750 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 17:29:59 +0530 Subject: [PATCH 05/55] Revert "Issue SB-24793 feat: Assessment data archival data product job implementation and testcase" This reverts commit 4ca0b4cd9b8c2a2e5cca9b3768841533d61ad1a6. --- .../job/report/AssessmentArchivalJob.scala | 4 ++-- .../.1001-2019-36-1626263593647.csv.crc | Bin 24 -> 0 bytes .../.1005-2019-36-1626263593672.csv.crc | Bin 16 -> 0 bytes .../.1006-2019-36-1626263593695.csv.crc | Bin 20 -> 0 bytes .../.1010-2019-36-1626263593639.csv.crc | Bin 20 -> 0 bytes .../archival-data/1001-2019-36-1626263593647.csv | 4 ---- .../archival-data/1005-2019-36-1626263593672.csv | 2 -- .../archival-data/1006-2019-36-1626263593695.csv | 3 --- .../archival-data/1010-2019-36-1626263593639.csv | 3 --- .../resources/reports/archival-data/Archive.zip | Bin 2562 -> 0 bytes 10 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 data-products/src/test/resources/reports/archival-data/.1001-2019-36-1626263593647.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/.1005-2019-36-1626263593672.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/.1006-2019-36-1626263593695.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/.1010-2019-36-1626263593639.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/Archive.zip diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 17dc3a530..9e1cb6567 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -34,14 +34,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] try { val res = CommonUtil.time(archiveData(spark, fetchData, jobConfig)) - val total_archived_files = res._2.length if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) - JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) + JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> res._2.length))) } finally { frameworkContext.closeContext() spark.close() } + } // $COVERAGE-ON$ diff --git a/data-products/src/test/resources/reports/archival-data/.1001-2019-36-1626263593647.csv.crc b/data-products/src/test/resources/reports/archival-data/.1001-2019-36-1626263593647.csv.crc deleted file mode 100644 index 5930ba67fa6f3830983590c197ba242f21fd2a80..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24 fcmYc;N@ieSU}88lpWTu_h&$=b6pP#of@RkLP$&o7 diff --git a/data-products/src/test/resources/reports/archival-data/.1005-2019-36-1626263593672.csv.crc b/data-products/src/test/resources/reports/archival-data/.1005-2019-36-1626263593672.csv.crc deleted file mode 100644 index 52523c8c3e27811125e71048892dd2f89a64f881..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}9JjwC{k9b$B8GByI%G diff --git a/data-products/src/test/resources/reports/archival-data/.1006-2019-36-1626263593695.csv.crc b/data-products/src/test/resources/reports/archival-data/.1006-2019-36-1626263593695.csv.crc deleted file mode 100644 index 7ab5371d78e723ada6c815b1fefb3b03319eaf54..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 bcmYc;N@ieSU}D&sEO2~xu|uoO6^`QoJdFn2 diff --git a/data-products/src/test/resources/reports/archival-data/.1010-2019-36-1626263593639.csv.crc b/data-products/src/test/resources/reports/archival-data/.1010-2019-36-1626263593639.csv.crc deleted file mode 100644 index 9280088be2cc077cae1a911d085e523b81d63cba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 ccmYc;N@ieSU}CuH$TI)tLeZXiYvLXP06t&`m;e9( diff --git a/data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv b/data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv deleted file mode 100644 index efb3f4dd8..000000000 --- a/data-products/src/test/resources/reports/archival-data/1001-2019-36-1626263593647.csv +++ /dev/null @@ -1,4 +0,0 @@ -content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question -do_112835335135993856149,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,20,4,2019-09-06T15:29:51.000+05:30,2/2,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" -do_112835336280596480151,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:58:51.000+0000,30,10,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" -do_112835336280596480151,A4,user021,do_2123101488779837441168,1001,1971-09-22 02:10:53.444+0000,2019-09-06 09:57:51.000+0000,30,8,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv b/data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv deleted file mode 100644 index 43b7da208..000000000 --- a/data-products/src/test/resources/reports/archival-data/1005-2019-36-1626263593672.csv +++ /dev/null @@ -1,2 +0,0 @@ -content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question -do_112832394979106816112,A1,user015,do_112695422838472704115,1005,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv b/data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv deleted file mode 100644 index 6d0881917..000000000 --- a/data-products/src/test/resources/reports/archival-data/1006-2019-36-1626263593695.csv +++ /dev/null @@ -1,3 +0,0 @@ -content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question -do_112832394979106816112,A6,user026,do_1126458775024025601296,1006,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,30,10,2019-09-06T15:29:51.000+05:30,5/5,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" -do_112832394979106816112,A6,user021,do_1126458775024025601296,1006,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,30,10,2019-09-06T15:29:51.000+05:30,6/6,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv b/data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv deleted file mode 100644 index 30ca494cd..000000000 --- a/data-products/src/test/resources/reports/archival-data/1010-2019-36-1626263593639.csv +++ /dev/null @@ -1,3 +0,0 @@ -content_id,attempt_id,user_id,course_id,batch_id,created_on,last_attempted_on,total_max_score,total_score,updated_on,grand_total,question -do_112835335135993856149,A3,user030,do_1125559882615357441175,1010,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06T15:29:51.000+05:30,10/2,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" -do_112832394979106816112,A2,user030,do_1125559882615357441175,1010,1971-09-22 02:10:53.444+0000,2019-09-06 09:59:51.000+0000,10,5,2019-09-06T15:29:51.000+05:30,4/4,"[{id: 'q3', max_score: 2, score: 1, type: 'mmc', title: 'choose one', resvalues: [{'2': '{\"text\":\"Quantity\\n\"}'}], params: [{'1': '{\"text\":\"Space and Shape\\n\"}'}, {'2': '{\"text\":\"Quantity\\n\"}'}, {'3': '{\"text\":\"Reading Skills\\n\"}'}, {'4': '{\"text\":\"Uncertainity and Data\\n\"}'}, {'answer': '{\"correct\":[\"3\"]}'}], description: 'Description of the question', duration: 10}]" diff --git a/data-products/src/test/resources/reports/archival-data/Archive.zip b/data-products/src/test/resources/reports/archival-data/Archive.zip deleted file mode 100644 index 7df887b6d57c259fb81688cb8efc7f956c9d102e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2562 zcmbuBe>BtkAIG;5%kOH#O-5sW3^RtzMJd#1Vxh=2Nv1Tuex&4*t{t@L+Wn#e7}9G<9p6Mr}yi8&gb*_=lwkA{dm4#&zHTekT3)Q z=GRR8A_4H9r6URezyQhsJv3TRM<1<+)xj9)=o#tr3uB1I7#ZwCoe2tYa+Cy!B6}A- z*!)NEzIa(202F=?1OWJZ0ssrtF2Cn!2xZmumvp>rP06piwF@chAaq$)B77p#&eF4} zTByW&dC|B-LeX1DK{nPpBM`;$U)!_UnU#n4sgv!JSwW*-PqMN|HRp_O(PcMCO$*4ds=_I zss6G{Sr__@w=_)m3b3+cy405Ja#pXuSmPWvRTmIqF}X<8_iD&18{p2T*3^Xr25!5y z=TgUQUwu~@2k(c<%4525>M}XO|dm<&u5+|HLIp)F!_PWf{s+xY%TZv~7K4z|J7%bGgCpMa|@5 zmrQc;@EGV789Fs8H-b}`s$w@|map#jwDuvX3=dXO4kivt2N{d3^b_m*L}~d%nfIqr zq0zB-0y08mJcB}X(z)p{S{%FK9JixQW>iJwodW#5P>u8j3B?{TDuD>SKb!hRYt`Ejbq3?X!x zD@Wsuj{ps*2KE(ZjgfM4h))44!q5l3mxTL=NfqJe>QQb5?_@X68Lop%@T8s-m!rS~ zQf04o*DH0B*abPXdK0bL3k?P&v=2wBU**WlevDV6=^^&6rB z>^qQka|wLx0=Qda=LvttZnPD4tl?MKJN{pDX!4_>|Hhsm9%B+mF1z&FSyQ*yFU-~1 z=e(K=q%4j+Y{?8?-93nR4(lv2gY1=m`tv6?W}RgS#NQpE90Uu_8dTQIpAH$A|WF6i6lgP+F| zU&BCd$VMKAYgbVNkGVqdW2!Q#qxx>w-Rna#FUZtxtnMn8R-I3!;dh=GNR;BkpFpie zl6f3fQr+=S-a`;XV@ZQ=9tDl{qhu+Ew80V zijI+Uc`mE0n^H&DKIEZkBnwX!!?CtYG=yn(*-0Ez#5gMF1+7Iy&5RB7Sf4r5&pO%W zLwPH2*Th@9KRS8+RzWUucC>slDgF+b*7iuO9D6UrCfMP9cbS6z7M;702PcDniw(dwn#1NVNPN68_dKdicS)nbM^e zMCbiJC1(XCCsIGXUETEVT@XT!GgcP05OXXbpQYW From f4ca1cba32887e03b2e6f61e01f71349c5efac08 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 17:31:55 +0530 Subject: [PATCH 06/55] Issue SB-24793 feat: Assessment data archival data product job implementation and testcase --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 9e1cb6567..607b648ca 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -34,8 +34,9 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] try { val res = CommonUtil.time(archiveData(spark, fetchData, jobConfig)) + val total_archived_files = res._2.length if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) - JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> res._2.length))) + JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) } finally { frameworkContext.closeContext() spark.close() From acd9a11c2fe2d6e7aaf2e942a24c72f6dc98b8f6 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 14 Jul 2021 18:03:12 +0530 Subject: [PATCH 07/55] Issue SB-24793 feat: Assessment data archival cassandra connection and enabling the code coverage --- .../analytics/job/report/AssessmentArchivalJob.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 607b648ca..b32109774 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -1,6 +1,8 @@ package org.sunbird.analytics.job.report +import com.datastax.spark.connector.cql.CassandraConnectorConf import org.apache.spark.SparkContext +import org.apache.spark.sql.cassandra.CassandraSparkSessionFunctions import org.apache.spark.sql.functions.{col, to_timestamp, weekofyear, year} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SparkSession} @@ -21,6 +23,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep case class BatchPartition(batch_id: String, year: Int, week_of_year: Int) + // $COVERAGE-OFF$ Disabling scoverage for main and execute method override def main(config: String)(implicit sc: Option[SparkContext], fc: Option[FrameworkContext]): Unit = { implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" @@ -32,6 +35,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep implicit val frameworkContext: FrameworkContext = getReportingFrameworkContext() val modelParams = jobConfig.modelParams.get val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] + init() try { val res = CommonUtil.time(archiveData(spark, fetchData, jobConfig)) val total_archived_files = res._2.length @@ -45,6 +49,10 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } + def init()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig) { + spark.setCassandraConf("LMSCluster", CassandraConnectorConf.ConnectionHostParam.option(AppConf.getConfig("sunbird.courses.cluster.host"))) + } + // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) From 9d7525684220ffa3efa7805317c366f6562e2af5 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 13:55:43 +0530 Subject: [PATCH 08/55] Issue SB-24793 feat: Assessment data archival question data serialisation issue fix --- .../collection/BaseCollectionExhaustJob.scala | 11 +++ .../job/report/AssessmentArchivalJob.scala | 75 +++++++++++-------- 2 files changed, 56 insertions(+), 30 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala index 9e2d1b786..d5e07e170 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala @@ -1,6 +1,7 @@ package org.sunbird.analytics.exhaust.collection import com.datastax.spark.connector.cql.CassandraConnectorConf +import com.google.gson.Gson import org.apache.spark.SparkContext import org.apache.spark.sql.cassandra._ import org.apache.spark.sql.functions._ @@ -488,6 +489,7 @@ trait BaseCollectionExhaustJob extends BaseReportsJob with IJob with OnDemandExh } object UDFUtils extends Serializable { + val gson = new Gson() def toDecryptFun(str: String): String = { DecryptUtil.decryptData(str) } @@ -509,8 +511,17 @@ object UDFUtils extends Serializable { sanitizedStr; } + def toParseFun(array: AnyRef): String = { + import scala.collection.JavaConverters._ + val str = gson.toJson(array) + val sanitizedStr = str.replace("\\n", "").replace("\\", "").replace("\"", "'"); + sanitizedStr; + } + val toJSON = udf[String, AnyRef](toJSONFun) + val parseResult = udf[String, AnyRef](toParseFun) + def extractFromArrayStringFun(board: String): String = { try { val str = JSONUtils.deserialize[AnyRef](board); diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index b32109774..2e442d686 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -1,9 +1,9 @@ package org.sunbird.analytics.job.report import com.datastax.spark.connector.cql.CassandraConnectorConf -import org.apache.spark.SparkContext +import org.apache.spark.{SparkContext, sql} import org.apache.spark.sql.cassandra.CassandraSparkSessionFunctions -import org.apache.spark.sql.functions.{col, to_timestamp, weekofyear, year} +import org.apache.spark.sql.functions.{col, explode_outer, to_timestamp, weekofyear, year} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.Level.INFO @@ -11,9 +11,9 @@ import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.DatasetUtil.extensions import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} import org.ekstep.analytics.framework.{FrameworkContext, IJob, JobConfig} +import org.sunbird.analytics.exhaust.collection.UDFUtils -import java.util.concurrent.CompletableFuture -import java.util.function.Supplier +import java.util.concurrent.atomic.AtomicInteger object AssessmentArchivalJob extends optional.Application with IJob with BaseReportsJob { val cassandraUrl = "org.apache.spark.sql.cassandra" @@ -49,29 +49,45 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } - def init()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig) { + def init()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Unit = { spark.setCassandraConf("LMSCluster", CassandraConnectorConf.ConnectionHostParam.option(AppConf.getConfig("sunbird.courses.cluster.host"))) } // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) - .withColumn("updated_on", to_timestamp(col("updated_on"))) + print("assessmentData" + assessmentData.show(false)) + print("assessmentData.printSchema()" + assessmentData.printSchema()) + val updatedData = assessmentData.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) - val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() - JobLogger.log(s"Total Batches to Archive By Year & Week ${archivedBatchList.length}", None, INFO) - + .withColumn("question", UDFUtils.parseResult(col("question"))) +// .withColumn("questiondata",explode_outer(col("question"))) +// .withColumn("questionresponse", UDFUtils.toJSON(col("questiondata.resvalues"))) +// .withColumn("questionoption", UDFUtils.toJSON(col("questiondata.params"))) + //.withColumn("question", UDFUtils.toJSON(col("question"))) + // assessmentData.coalesce(1) + // .write + // .partitionBy(partitionCols:_*) + // .mode("overwrite") + // .format("com.databricks.spark.csv") + // .option("header", "true") + // .save(AppConf.getConfig("save_path")) + print("updatedData" + updatedData.show(false)) + val archivedBatchList = updatedData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + val archivedBatchCount = new AtomicInteger(archivedBatchList.length) + JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) - - val archivedBatchResult = for (batch <- batchesToArchive) yield { - val filteredDF = assessmentData + for (batch <- batchesToArchive) yield { + val filteredDF = updatedData .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) - syncToCloud(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) + JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) + assessmentData.unpersist() + metrics } - CompletableFuture.allOf(archivedBatchResult: _*) - archivedBatchResult.map(f => f.join()) } def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame): DataFrame = { @@ -79,34 +95,33 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { - sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") + //sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") JobLogger.log(s"The Job Cleared The Table Data SuccessFully, Please Execute The Compaction", None, INFO) } - def syncToCloud(archivedData: DataFrame, batch: BatchPartition, conf: JobConfig): CompletableFuture[Map[String, Any]] = { - CompletableFuture.supplyAsync(new Supplier[Map[String, Any]]() { - override def get(): Map[String, Any] = { - val res = CommonUtil.time(upload(archivedData, s"${batch.batch_id}-${batch.year}-${batch.week_of_year}", conf)) - val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "time_taken" -> res._1, "total_records" -> archivedData.count()) - JobLogger.log(s"Data is archived for ", Some(metrics), INFO) - metrics - } - }) - } - - def upload(reportData: DataFrame, - fileName: String, + // def syncToCloud(archivedData: DataFrame, batch: BatchPartition, conf: JobConfig): CompletableFuture[Map[String, Any]] = { + // CompletableFuture.supplyAsync(new Supplier[Map[String, Any]]() { + // override def get(): Map[String, Any] = { + // val res = CommonUtil.time(upload(archivedData, s"${batch.batch_id}-${batch.year}-${batch.week_of_year}", conf)) + // Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "time_taken" -> res._1, "total_records" -> archivedData.count()) + // } + // }) + // } + + def upload(archivedData: DataFrame, + batch: BatchPartition, jobConfig: JobConfig): List[String] = { val modelParams = jobConfig.modelParams.get val reportPath: String = modelParams.getOrElse("reportPath", "archival-data/").asInstanceOf[String] val container = AppConf.getConfig("cloud.container.reports") val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") + val fileName = s"${batch.batch_id}-${batch.year}-${batch.week_of_year}" val storageConfig = getStorageConfig( container, objectKey, jobConfig) JobLogger.log(s"Uploading reports to blob storage", None, INFO) - reportData.saveToBlobStore(storageConfig, "csv", s"$reportPath$fileName-${System.currentTimeMillis()}", Option(Map("header" -> "true")), None) + archivedData.saveToBlobStore(storageConfig, "csv", s"$reportPath$fileName-${System.currentTimeMillis()}", Option(Map("header" -> "true")), None) } } From 3bb90fbd46df0a2fb0c23c2497c789f1c460f999 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 16:12:03 +0530 Subject: [PATCH 09/55] Issue SB-24793 feat: Assessment data archival question data serialisation issue fix --- ...4873034675814445-2021-16-1626431620003.csv.crc | Bin 0 -> 128 bytes ...4873034675814445-2021-17-1626431618052.csv.crc | Bin 0 -> 72 bytes ...4873034675814445-2021-20-1626431619056.csv.crc | Bin 0 -> 128 bytes .../.batch-001-2021-24-1626431616708.csv.crc | Bin 0 -> 44 bytes ...01324873034675814445-2021-16-1626431620003.csv | 3 +++ ...01324873034675814445-2021-17-1626431618052.csv | 2 ++ ...01324873034675814445-2021-20-1626431619056.csv | 3 +++ .../batch-001-2021-24-1626431616708.csv | 6 ++++++ 8 files changed, 14 insertions(+) create mode 100644 data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-16-1626431620003.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-17-1626431618052.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-20-1626431619056.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/.batch-001-2021-24-1626431616708.csv.crc create mode 100644 data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv create mode 100644 data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv create mode 100644 data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv create mode 100644 data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv diff --git a/data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-16-1626431620003.csv.crc b/data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-16-1626431620003.csv.crc new file mode 100644 index 0000000000000000000000000000000000000000..8f9090eaaf482663a9daac60c710145372f005e0 GIT binary patch literal 128 zcmV-`0Du2ua$^7h00IC9j$CjdkR|9o!C{J&V9|29)Z?+_+(QF_aUTQ=Kco#=?{Z6D z$$UDDOe(dEI#=mCH~Si~{YqtaUy(;3#M^bGYHThVa{M9EG`Fr&up{)pOg@RUc6{j% i4M{HKd5G6+jXGN0X)Whh1>f3GC(pk)FX;-c~5gL^4`WG4X?V>k=glw_@QBpj|8g19= i=G$>NEYpeHp^H|&c)T9i42;abw$Ea+FQ|FWC-IG->OT+w literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/reports/archival-data/.batch-001-2021-24-1626431616708.csv.crc b/data-products/src/test/resources/reports/archival-data/.batch-001-2021-24-1626431616708.csv.crc new file mode 100644 index 0000000000000000000000000000000000000000..536ec8d1cfbec1321ad9acbf077be2525a78515f GIT binary patch literal 44 zcmYc;N@ieSU}BKDFJRSKYJK{tWL?am#K7=0xA>CRtK?kUBwky#Ik2zz;+4N108|ta AF#rGn literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv new file mode 100644 index 000000000..4e427aeb7 --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv @@ -0,0 +1,3 @@ +user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on +423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,e0f219a2e7e4ec7213cf9dcce998f376,2021-04-25T22:35:55.039+05:30,5.0/5.0,2021-04-25T22:34:12.662+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-25T22:34:49.852+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":15.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-25T22:35:12.393+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":20.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-25T22:34:12.662+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":24.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-25T22:34:32.156+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":15.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-25T22:35:32.689+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":18.000000000000000000}]",5.0,5.0,2021-04-25T22:35:55.039+05:30 +423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,fc59c394456bf0b0e3e331e844eda4ff,2021-04-22T23:51:55.987+05:30,5.0/5.0,2021-04-22T23:49:03.064+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-22T23:50:27.875+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":47.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-22T23:51:15.737+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":43.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-22T23:49:03.064+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":34.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-22T23:49:36.796+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":27.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-22T23:51:34.768+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":16.000000000000000000}]",5.0,5.0,2021-04-22T23:51:55.987+05:30 diff --git a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv new file mode 100644 index 000000000..b8660f222 --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv @@ -0,0 +1,2 @@ +user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on +423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,ecc950daac1d39176546ffe667f35905,2021-04-26T20:52:20.316+05:30,5.0/5.0,2021-04-26T20:51:06.824+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-26T20:51:46.107+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":14.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-26T20:51:57.183+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":9.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-26T20:51:18.581+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":18.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-26T20:51:29.948+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":9.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-26T20:52:08.031+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":8.000000000000000000}]",5.0,5.0,2021-04-26T20:52:20.316+05:30 diff --git a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv new file mode 100644 index 000000000..aeedd4b5b --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv @@ -0,0 +1,3 @@ +user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on +423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,cf6b5389e6cc1eda8c6830408c730613,2021-05-19T22:10:50.844+05:30,5.0/5.0,2021-05-19T22:10:44.271+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-22T23:50:27.875+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":47.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-22T23:51:15.737+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":43.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-22T23:49:03.064+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":34.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-22T23:49:36.796+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":27.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-22T23:51:34.768+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":16.000000000000000000}]",5.0,5.0,2021-05-19T22:10:50.844+05:30 +423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_3132247563133255681780,218ef326d5da6f349d3a57e2275c5190,2021-05-19T22:40:25.506+05:30,3.0/5.0,2021-05-19T22:40:23.965+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-26T21:03:03.235+05:30\",\"max_score\":1.0,\"score\":0.0,\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":4.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-26T21:03:12.796+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":10.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-26T21:02:57.005+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":5.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-26T21:02:59.120+05:30\",\"max_score\":1.0,\"score\":0.0,\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":2.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-26T21:03:21.172+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":8.000000000000000000}]",5.0,3.0,2021-05-19T22:40:25.506+05:30 diff --git a/data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv b/data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv new file mode 100644 index 000000000..b5fb6d7b3 --- /dev/null +++ b/data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv @@ -0,0 +1,6 @@ +user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on +user-008,do_11306040245271756813015,batch-001,do_112876961957437440179,attempat-001,"",10,"",[],10.0,10.0,2021-06-17T05:30:00.000+05:30 +user-010,do_11306040245271756813015,batch-001,do_11307593493010022418,attempat-001,"",15,"",[],15.0,15.0,2021-06-17T05:30:00.000+05:30 +user-002,do_1130928636168192001667,batch-001,do_1128870328040161281204,attempat-001,"",10,"",[],10.0,10.0,2021-06-17T05:30:00.000+05:30 +user-001,do_1130928636168192001667,batch-001,do_1128870328040161281204,attempat-001,"",20,"","[{\"id\":\"do_213019475454476288155\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"testQuestiontextandformula\",\"resvalues\":[{\"1\":\"{\\\"text\\\":\\\"A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"no\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"1\\\"]}\"}],\"description\":\"testQuestiontextandformula\",\"duration\":1.000000000000000000},{\"id\":\"do_213019970118279168165\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"test with formula\",\"resvalues\":[{\"1\":\"{\\\"text\\\":\\\"1\\\\nA=\\\\\\\\\\\\\\\\pi r^2A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"1\\\\nA=\\\\\\\\\\\\\\\\pi r^2A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"1\\\"]}\"}],\"description\":\"\",\"duration\":1.000000000000000000},{\"id\":\"do_213019972814823424168\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":1.0,\"score\":0.33,\"type\":\"mtf\",\"title\":\"Copy of - Match the following:\\\\n\\\\nx=\\\\\\\\frac{-b\\\\\\\\pm\\\\\\\\sqrt{b^2-4ac}}{2a}\\\\nArrange the following equations in correct order.\\\\n\",\"resvalues\":[{\"lhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"A=\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^2\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\frac{4}{3}\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^3\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"a^n\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\times a^m=a^{n+m}\\\\\\\\n\\\\\\\"}\\\"}]\"},{\"rhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Volume of sphere\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Area of Circle\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Product Rule\\\\\\\\n\\\\\\\"}\\\"}]\"}],\"params\":[{\"lhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"A=\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^2\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\frac{4}{3}\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^3\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"a^n\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\times a^m=a^{n+m}\\\\\\\\n\\\\\\\"}\\\"}]\"},{\"rhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Volume of sphere\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Product Rule\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Area of Circle\\\\\\\\n\\\\\\\"}\\\"}]\"},{\"answer\":\"{\\\"lhs\\\":[\\\"1\\\",\\\"2\\\",\\\"3\\\"],\\\"rhs\\\":[\\\"3\\\",\\\"1\\\",\\\"2\\\"]}\"}],\"description\":\"\",\"duration\":2.000000000000000000},{\"id\":\"do_2130256513760624641171\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":10.0,\"score\":10.0,\"type\":\"mcq\",\"title\":\"2 +2 is..? mark ia 10\\\\n\",\"resvalues\":[{\"1\":\"{\\\"text\\\":\\\"4\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"4\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"8\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"10\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"1\\\"]}\"}],\"description\":\"\",\"duration\":12.000000000000000000}]",20.0,20.0,2021-06-17T05:30:00.000+05:30 +user-003,do_1130928636168192001667,batch-001,do_112876961957437440179,attempat-001,"",10,"",[],10.0,10.0,2021-06-17T05:30:00.000+05:30 From 358b96a13612b20b4e5c18a8b16f10f10c8ab894 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 16:13:18 +0530 Subject: [PATCH 10/55] Issue SB-24793 feat: Assessment data archival question data serialisation issue fix --- .../job/report/AssessmentArchivalJob.scala | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 2e442d686..46f0e6912 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -1,9 +1,9 @@ package org.sunbird.analytics.job.report import com.datastax.spark.connector.cql.CassandraConnectorConf -import org.apache.spark.{SparkContext, sql} +import org.apache.spark.SparkContext import org.apache.spark.sql.cassandra.CassandraSparkSessionFunctions -import org.apache.spark.sql.functions.{col, explode_outer, to_timestamp, weekofyear, year} +import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.Level.INFO @@ -11,13 +11,12 @@ import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.DatasetUtil.extensions import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} import org.ekstep.analytics.framework.{FrameworkContext, IJob, JobConfig} -import org.sunbird.analytics.exhaust.collection.UDFUtils import java.util.concurrent.atomic.AtomicInteger object AssessmentArchivalJob extends optional.Application with IJob with BaseReportsJob { val cassandraUrl = "org.apache.spark.sql.cassandra" - private val assessmentAggDBSettings: Map[String, String] = Map("table" -> "assessment_aggregator", "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") + private val assessmentAggDBSettings: Map[String, String] = Map("table" -> "assessment_aggregator_temp2", "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" private val partitionCols = List("batch_id", "year", "week_of_year") @@ -32,6 +31,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.start(s"$jobName started executing", Option(Map("config" -> config, "model" -> jobName))) implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](config) implicit val spark: SparkSession = openSparkSession(jobConfig) + implicit val frameworkContext: FrameworkContext = getReportingFrameworkContext() val modelParams = jobConfig.modelParams.get val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] @@ -55,32 +55,18 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) - print("assessmentData" + assessmentData.show(false)) - print("assessmentData.printSchema()" + assessmentData.printSchema()) - val updatedData = assessmentData.withColumn("updated_on", to_timestamp(col("updated_on"))) + val assessmentData: DataFrame = getAssessmentData(sparkSession) + .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) - .withColumn("question", UDFUtils.parseResult(col("question"))) -// .withColumn("questiondata",explode_outer(col("question"))) -// .withColumn("questionresponse", UDFUtils.toJSON(col("questiondata.resvalues"))) -// .withColumn("questionoption", UDFUtils.toJSON(col("questiondata.params"))) - //.withColumn("question", UDFUtils.toJSON(col("question"))) - // assessmentData.coalesce(1) - // .write - // .partitionBy(partitionCols:_*) - // .mode("overwrite") - // .format("com.databricks.spark.csv") - // .option("header", "true") - // .save(AppConf.getConfig("save_path")) - print("updatedData" + updatedData.show(false)) - val archivedBatchList = updatedData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + .withColumn("question", to_json(col("question"))) + val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() val archivedBatchCount = new AtomicInteger(archivedBatchList.length) JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) for (batch <- batchesToArchive) yield { - val filteredDF = updatedData + val filteredDF = assessmentData .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) From e46e0a17ca761a1808a692a81be95a609c8f3e81 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 16:16:01 +0530 Subject: [PATCH 11/55] Issue SB-24793 feat: Assessment data archival question data serialisation issue fix --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 46f0e6912..cacb233c0 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -55,7 +55,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { - val assessmentData: DataFrame = getAssessmentData(sparkSession) + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) From 423f9773d8c65b517ea52a1c132f266ac593e083 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 16:20:25 +0530 Subject: [PATCH 12/55] Issue SB-24793 feat: Assessment data archival question data serialisation issue fix --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index cacb233c0..991cefb5c 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -16,7 +16,7 @@ import java.util.concurrent.atomic.AtomicInteger object AssessmentArchivalJob extends optional.Application with IJob with BaseReportsJob { val cassandraUrl = "org.apache.spark.sql.cassandra" - private val assessmentAggDBSettings: Map[String, String] = Map("table" -> "assessment_aggregator_temp2", "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") + private val assessmentAggDBSettings: Map[String, String] = Map("table" -> "assessment_aggregator", "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" private val partitionCols = List("batch_id", "year", "week_of_year") From 9cea5eb67ab12d5552e216a8a3148eaa4fcbd218 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 16:58:53 +0530 Subject: [PATCH 13/55] Issue SB-24793 feat: removing the unwanted csv files --- ...4873034675814445-2021-16-1626431620003.csv.crc | Bin 128 -> 0 bytes ...4873034675814445-2021-17-1626431618052.csv.crc | Bin 72 -> 0 bytes ...4873034675814445-2021-20-1626431619056.csv.crc | Bin 128 -> 0 bytes .../.batch-001-2021-24-1626431616708.csv.crc | Bin 44 -> 0 bytes ...01324873034675814445-2021-16-1626431620003.csv | 3 --- ...01324873034675814445-2021-17-1626431618052.csv | 2 -- ...01324873034675814445-2021-20-1626431619056.csv | 3 --- .../batch-001-2021-24-1626431616708.csv | 6 ------ 8 files changed, 14 deletions(-) delete mode 100644 data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-16-1626431620003.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-17-1626431618052.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-20-1626431619056.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/.batch-001-2021-24-1626431616708.csv.crc delete mode 100644 data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv delete mode 100644 data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv diff --git a/data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-16-1626431620003.csv.crc b/data-products/src/test/resources/reports/archival-data/.01324873034675814445-2021-16-1626431620003.csv.crc deleted file mode 100644 index 8f9090eaaf482663a9daac60c710145372f005e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 128 zcmV-`0Du2ua$^7h00IC9j$CjdkR|9o!C{J&V9|29)Z?+_+(QF_aUTQ=Kco#=?{Z6D z$$UDDOe(dEI#=mCH~Si~{YqtaUy(;3#M^bGYHThVa{M9EG`Fr&up{)pOg@RUc6{j% i4M{HKd5G6+jXGN0X)Whh1>f3GC(pk)FX;-c~5gL^4`WG4X?V>k=glw_@QBpj|8g19= i=G$>NEYpeHp^H|&c)T9i42;abw$Ea+FQ|FWC-IG->OT+w diff --git a/data-products/src/test/resources/reports/archival-data/.batch-001-2021-24-1626431616708.csv.crc b/data-products/src/test/resources/reports/archival-data/.batch-001-2021-24-1626431616708.csv.crc deleted file mode 100644 index 536ec8d1cfbec1321ad9acbf077be2525a78515f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 44 zcmYc;N@ieSU}BKDFJRSKYJK{tWL?am#K7=0xA>CRtK?kUBwky#Ik2zz;+4N108|ta AF#rGn diff --git a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv deleted file mode 100644 index 4e427aeb7..000000000 --- a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-16-1626431620003.csv +++ /dev/null @@ -1,3 +0,0 @@ -user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on -423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,e0f219a2e7e4ec7213cf9dcce998f376,2021-04-25T22:35:55.039+05:30,5.0/5.0,2021-04-25T22:34:12.662+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-25T22:34:49.852+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":15.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-25T22:35:12.393+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":20.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-25T22:34:12.662+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":24.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-25T22:34:32.156+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":15.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-25T22:35:32.689+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":18.000000000000000000}]",5.0,5.0,2021-04-25T22:35:55.039+05:30 -423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,fc59c394456bf0b0e3e331e844eda4ff,2021-04-22T23:51:55.987+05:30,5.0/5.0,2021-04-22T23:49:03.064+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-22T23:50:27.875+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":47.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-22T23:51:15.737+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":43.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-22T23:49:03.064+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":34.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-22T23:49:36.796+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":27.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-22T23:51:34.768+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":16.000000000000000000}]",5.0,5.0,2021-04-22T23:51:55.987+05:30 diff --git a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv deleted file mode 100644 index b8660f222..000000000 --- a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-17-1626431618052.csv +++ /dev/null @@ -1,2 +0,0 @@ -user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on -423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,ecc950daac1d39176546ffe667f35905,2021-04-26T20:52:20.316+05:30,5.0/5.0,2021-04-26T20:51:06.824+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-26T20:51:46.107+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":14.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-26T20:51:57.183+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":9.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-26T20:51:18.581+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":18.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-26T20:51:29.948+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":9.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-26T20:52:08.031+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":8.000000000000000000}]",5.0,5.0,2021-04-26T20:52:20.316+05:30 diff --git a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv b/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv deleted file mode 100644 index aeedd4b5b..000000000 --- a/data-products/src/test/resources/reports/archival-data/01324873034675814445-2021-20-1626431619056.csv +++ /dev/null @@ -1,3 +0,0 @@ -user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on -423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_31322465112051712011308,cf6b5389e6cc1eda8c6830408c730613,2021-05-19T22:10:50.844+05:30,5.0/5.0,2021-05-19T22:10:44.271+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-22T23:50:27.875+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":47.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-22T23:51:15.737+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":43.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-22T23:49:03.064+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":34.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-22T23:49:36.796+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":27.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-22T23:51:34.768+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":16.000000000000000000}]",5.0,5.0,2021-05-19T22:10:50.844+05:30 -423cd838-46d0-4e68-921c-c839eaf78b0c,do_31324384463607398417453,01324873034675814445,do_3132247563133255681780,218ef326d5da6f349d3a57e2275c5190,2021-05-19T22:40:25.506+05:30,3.0/5.0,2021-05-19T22:40:23.965+05:30,"[{\"id\":\"do_31322467627417600011838\",\"assess_ts\":\"2021-04-26T21:03:03.235+05:30\",\"max_score\":1.0,\"score\":0.0,\"title\":\"कक्षा में बच्चों के लिए सीखने का वातावरण बनाने के दृष्टिकोण से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चों को आपस में बातचीत नहीं करने देनी चाहिए।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"सीखना एक मानसिक प्रक्रिया का परिणाम है, अतः सीखने में वातावरण की कोई भूमिका नहीं होती है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"कक्षा शिक्षण के समय बच्चों को छोटे छोटे समूह में मिलकर कार्य करने के अवसर देने चाहिए।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चों को सिर्फ निपुण व्यस्क द्वारा बताया गया कार्य ही करना चाहिए।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":4.000000000000000000},{\"id\":\"do_31322467296029081611930\",\"assess_ts\":\"2021-04-26T21:03:12.796+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"न्यूरो प्लास्टिसिटी की समझ का आप कक्षा-कक्ष में क्रियान्वयन के लिए किन विकल्पों का चुनाव करेंगे ?nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"तनाव मुक्त वातावरण में न्यूरोन्सxa0 के बीच सुगमता से सम्बन्ध स्थापित स्थापित होता है, इसीलिए भयमुक्त वातावरणxa0 निर्माण करेंगे।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नई दक्षताओंxa0 के विकास से मस्तिष्क की जटिल बनावट प्रभावित होती है, इसीलिए विविध अनुभवों से गुजरने का मौका बच्चों को देंगे।xa0xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष में क्रियान्वयन के लिए उपयोग नहीं करेंगे।xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"विकल्प पहला और दूसरा दोनों का कक्षा- कक्ष मेंxa0क्रियान्वयन के लिए उपयोगxa0करेंगे।\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":10.000000000000000000},{\"id\":\"do_31322468187007385611840\",\"assess_ts\":\"2021-04-26T21:02:57.005+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सीखने का मतलब है -xa0nnxa0n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"पूर्व ज्ञान में वृद्धि, पुष्टि, या बदलाव।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"नए निष्कर्ष पर पहुंचना एवं दूसरों को भी समझा पाना।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखी हुई बात से सम्बंधित कार्य कर पाना।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"उपरोक्त सभी।xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":5.000000000000000000},{\"id\":\"do_31322467929121587211839\",\"assess_ts\":\"2021-04-26T21:02:59.120+05:30\",\"max_score\":1.0,\"score\":0.0,\"title\":\"निम्न मेंxa0 से कौन सा कथन सत्य है ?nnxa0n\",\"resvalues\":[],\"params\":[{\"1\":\"{\\\"text\\\":\\\"बच्चे कोरे कागज़ की तरह होते हैं।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"जो कुछ भी सीखा है वह व्यवहार में नज़र आना अनिवार्य है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"सीखने ज्ञान के बुनयादी ढांचे में बदलाव है।xa0xa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"बच्चे ने याद कर लिया है तो इसका अर्थ है की उन्होंने सीख लिया है।xa0xa0xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"3\\\"]}\"}],\"duration\":2.000000000000000000},{\"id\":\"do_31322466636939264011837\",\"assess_ts\":\"2021-04-26T21:03:21.172+05:30\",\"max_score\":1.0,\"score\":1.0,\"title\":\"सार्थकxa0और गुणवत्तापूर्ण प्रेरणा का बच्चों के सीखने से क्या सम्बन्ध है ?n\",\"resvalues\":[{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"इससे बच्चों के सीखने पर कोई प्रभाव नहीxa0पड़ता।xa0xa0\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"इससे बच्चों में नकारात्मक दृष्टिकोण को बढ़ावा मिलता है।xa0xa0\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"इससे बच्चे हर चीज़ के लिए बड़ों पर निर्भर हो जाते हैंxa0\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"इससेxa0 बच्चों के सीखने की प्रक्रिया को रोचक बनाए रखने में मदद है।xa0xa0\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"4\\\"]}\"}],\"duration\":8.000000000000000000}]",5.0,3.0,2021-05-19T22:40:25.506+05:30 diff --git a/data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv b/data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv deleted file mode 100644 index b5fb6d7b3..000000000 --- a/data-products/src/test/resources/reports/archival-data/batch-001-2021-24-1626431616708.csv +++ /dev/null @@ -1,6 +0,0 @@ -user_id,course_id,batch_id,content_id,attempt_id,created_on,grand_total,last_attempted_on,question,total_max_score,total_score,updated_on -user-008,do_11306040245271756813015,batch-001,do_112876961957437440179,attempat-001,"",10,"",[],10.0,10.0,2021-06-17T05:30:00.000+05:30 -user-010,do_11306040245271756813015,batch-001,do_11307593493010022418,attempat-001,"",15,"",[],15.0,15.0,2021-06-17T05:30:00.000+05:30 -user-002,do_1130928636168192001667,batch-001,do_1128870328040161281204,attempat-001,"",10,"",[],10.0,10.0,2021-06-17T05:30:00.000+05:30 -user-001,do_1130928636168192001667,batch-001,do_1128870328040161281204,attempat-001,"",20,"","[{\"id\":\"do_213019475454476288155\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"testQuestiontextandformula\",\"resvalues\":[{\"1\":\"{\\\"text\\\":\\\"A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"no\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"1\\\"]}\"}],\"description\":\"testQuestiontextandformula\",\"duration\":1.000000000000000000},{\"id\":\"do_213019970118279168165\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":1.0,\"score\":1.0,\"type\":\"mcq\",\"title\":\"test with formula\",\"resvalues\":[{\"1\":\"{\\\"text\\\":\\\"1\\\\nA=\\\\\\\\\\\\\\\\pi r^2A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"1\\\\nA=\\\\\\\\\\\\\\\\pi r^2A=\\\\\\\\\\\\\\\\pi r^2\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"2\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"1\\\"]}\"}],\"description\":\"\",\"duration\":1.000000000000000000},{\"id\":\"do_213019972814823424168\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":1.0,\"score\":0.33,\"type\":\"mtf\",\"title\":\"Copy of - Match the following:\\\\n\\\\nx=\\\\\\\\frac{-b\\\\\\\\pm\\\\\\\\sqrt{b^2-4ac}}{2a}\\\\nArrange the following equations in correct order.\\\\n\",\"resvalues\":[{\"lhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"A=\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^2\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\frac{4}{3}\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^3\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"a^n\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\times a^m=a^{n+m}\\\\\\\\n\\\\\\\"}\\\"}]\"},{\"rhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Volume of sphere\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Area of Circle\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Product Rule\\\\\\\\n\\\\\\\"}\\\"}]\"}],\"params\":[{\"lhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"A=\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^2\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\frac{4}{3}\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\pi r^3\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"a^n\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\times a^m=a^{n+m}\\\\\\\\n\\\\\\\"}\\\"}]\"},{\"rhs\":\"[{\\\"1\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Volume of sphere\\\\\\\\n\\\\\\\"}\\\"},{\\\"2\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Product Rule\\\\\\\\n\\\\\\\"}\\\"},{\\\"3\\\":\\\"{\\\\\\\"text\\\\\\\":\\\\\\\"Area of Circle\\\\\\\\n\\\\\\\"}\\\"}]\"},{\"answer\":\"{\\\"lhs\\\":[\\\"1\\\",\\\"2\\\",\\\"3\\\"],\\\"rhs\\\":[\\\"3\\\",\\\"1\\\",\\\"2\\\"]}\"}],\"description\":\"\",\"duration\":2.000000000000000000},{\"id\":\"do_2130256513760624641171\",\"assess_ts\":\"2020-06-18T23:45:56.490+05:30\",\"max_score\":10.0,\"score\":10.0,\"type\":\"mcq\",\"title\":\"2 +2 is..? mark ia 10\\\\n\",\"resvalues\":[{\"1\":\"{\\\"text\\\":\\\"4\\\\n\\\"}\"}],\"params\":[{\"1\":\"{\\\"text\\\":\\\"4\\\\n\\\"}\"},{\"2\":\"{\\\"text\\\":\\\"3\\\\n\\\"}\"},{\"3\":\"{\\\"text\\\":\\\"8\\\\n\\\"}\"},{\"4\":\"{\\\"text\\\":\\\"10\\\\n\\\"}\"},{\"answer\":\"{\\\"correct\\\":[\\\"1\\\"]}\"}],\"description\":\"\",\"duration\":12.000000000000000000}]",20.0,20.0,2021-06-17T05:30:00.000+05:30 -user-003,do_1130928636168192001667,batch-001,do_112876961957437440179,attempat-001,"",10,"",[],10.0,10.0,2021-06-17T05:30:00.000+05:30 From ec710cddd771c34af7148ae9353f00604655843b Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 16 Jul 2021 17:01:23 +0530 Subject: [PATCH 14/55] Issue SB-24793 feat: removing the unwanted imports --- .../exhaust/collection/BaseCollectionExhaustJob.scala | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala index d5e07e170..9e2d1b786 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/collection/BaseCollectionExhaustJob.scala @@ -1,7 +1,6 @@ package org.sunbird.analytics.exhaust.collection import com.datastax.spark.connector.cql.CassandraConnectorConf -import com.google.gson.Gson import org.apache.spark.SparkContext import org.apache.spark.sql.cassandra._ import org.apache.spark.sql.functions._ @@ -489,7 +488,6 @@ trait BaseCollectionExhaustJob extends BaseReportsJob with IJob with OnDemandExh } object UDFUtils extends Serializable { - val gson = new Gson() def toDecryptFun(str: String): String = { DecryptUtil.decryptData(str) } @@ -511,17 +509,8 @@ object UDFUtils extends Serializable { sanitizedStr; } - def toParseFun(array: AnyRef): String = { - import scala.collection.JavaConverters._ - val str = gson.toJson(array) - val sanitizedStr = str.replace("\\n", "").replace("\\", "").replace("\"", "'"); - sanitizedStr; - } - val toJSON = udf[String, AnyRef](toJSONFun) - val parseResult = udf[String, AnyRef](toParseFun) - def extractFromArrayStringFun(board: String): String = { try { val str = JSONUtils.deserialize[AnyRef](board); From e29b23e22f861b55953bf2fdf4993aa97a0c23e4 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 19 Jul 2021 14:24:38 +0530 Subject: [PATCH 15/55] Issue SB-25481 feat: Assessment archival data product archive the data per batch wise --- .../job/report/AssessmentArchivalJob.scala | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 991cefb5c..c33078084 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -16,7 +16,7 @@ import java.util.concurrent.atomic.AtomicInteger object AssessmentArchivalJob extends optional.Application with IJob with BaseReportsJob { val cassandraUrl = "org.apache.spark.sql.cassandra" - private val assessmentAggDBSettings: Map[String, String] = Map("table" -> "assessment_aggregator", "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") + private val assessmentAggDBSettings: Map[String, String] = Map("table" -> AppConf.getConfig("sunbird.courses.assessment.table"), "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" private val partitionCols = List("batch_id", "year", "week_of_year") @@ -55,7 +55,9 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData) + val modelParams = jobConfig.modelParams.get + val batches: List[String] = modelParams.getOrElse("batches", List()).asInstanceOf[List[String]] + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) @@ -76,24 +78,16 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } } - def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame): DataFrame = { - fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) + def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { + val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) + if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds)) else assessmentDF } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { - //sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") + // sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") JobLogger.log(s"The Job Cleared The Table Data SuccessFully, Please Execute The Compaction", None, INFO) } - // def syncToCloud(archivedData: DataFrame, batch: BatchPartition, conf: JobConfig): CompletableFuture[Map[String, Any]] = { - // CompletableFuture.supplyAsync(new Supplier[Map[String, Any]]() { - // override def get(): Map[String, Any] = { - // val res = CommonUtil.time(upload(archivedData, s"${batch.batch_id}-${batch.year}-${batch.week_of_year}", conf)) - // Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "time_taken" -> res._1, "total_records" -> archivedData.count()) - // } - // }) - // } - def upload(archivedData: DataFrame, batch: BatchPartition, jobConfig: JobConfig): List[String] = { From 7e4ea761ba80684233ae10536145d2a22c695ade Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 19 Jul 2021 15:38:36 +0530 Subject: [PATCH 16/55] Issue SB-25481 feat: Assessment archival data product archive the data per batch wise --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index c33078084..d72c02abf 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,7 +56,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val modelParams = jobConfig.modelParams.get - val batches: List[String] = modelParams.getOrElse("batches", List()).asInstanceOf[List[String]] + val batches: List[String] = JSONUtils.deserialize[List[String]](modelParams.getOrElse("batches", "").asInstanceOf[String]) val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) From b32d1e6ef69d28718fca4c9780c1a3a2428fc816 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 19 Jul 2021 16:15:06 +0530 Subject: [PATCH 17/55] Issue SB-25481 feat: Assessment archival data product archive the data per batch wise --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index d72c02abf..ff9f7290c 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -55,8 +55,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { - val modelParams = jobConfig.modelParams.get - val batches: List[String] = JSONUtils.deserialize[List[String]](modelParams.getOrElse("batches", "").asInstanceOf[String]) + val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) From 22f76e9d2ef6319fb74d13e05d31847de7a36cba Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 19 Jul 2021 16:25:29 +0530 Subject: [PATCH 18/55] Issue SB-25481 feat: Assessment archival data product archive the data per batch wise --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index ff9f7290c..eaa5d7e8a 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -79,7 +79,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) - if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds)) else assessmentDF + if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isInCollection(batchIds)) else assessmentDF } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { From ffa1c5fdf21121224e90a3c221344bf30f75f49b Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 19 Jul 2021 17:06:19 +0530 Subject: [PATCH 19/55] Issue SB-25481 feat: Assessment archival data product archive the data per batch wise --- .../analytics/job/report/AssessmentArchivalJob.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index eaa5d7e8a..b292f82a4 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,11 +56,11 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) - .withColumn("updated_on", to_timestamp(col("updated_on"))) + val assessmentDF: DataFrame = getAssessmentData(sparkSession, fetchData, batches).persist() + val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) - .withColumn("question", to_json(col("question"))) + .withColumn("question", to_json(col("question"))).persist() val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() val archivedBatchCount = new AtomicInteger(archivedBatchList.length) JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) From 5db16a1a6a6aaa2dad3c35efe6146a25e51d8c0c Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 19 Jul 2021 17:17:19 +0530 Subject: [PATCH 20/55] Issue SB-25481 feat: Assessment archival data product cache issue fix --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index b292f82a4..d6207a2d5 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,8 +56,8 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentDF: DataFrame = getAssessmentData(sparkSession, fetchData, batches).persist() - val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) + .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) .withColumn("question", to_json(col("question"))).persist() From df8cf7e2d9d1375a06f1020d586a2bcf80fbdcf3 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 11:29:11 +0530 Subject: [PATCH 21/55] Issue SB-25481 feat: Assessment archival data product cache issue fix --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index d6207a2d5..eaa5d7e8a 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -60,7 +60,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) - .withColumn("question", to_json(col("question"))).persist() + .withColumn("question", to_json(col("question"))) val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() val archivedBatchCount = new AtomicInteger(archivedBatchList.length) JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) From 1f1906093c1d76c59588ca27e79a08c9c2593376 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 11:58:50 +0530 Subject: [PATCH 22/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index eaa5d7e8a..1baa44a07 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,7 +56,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches).cache() .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) @@ -64,8 +64,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() val archivedBatchCount = new AtomicInteger(archivedBatchList.length) JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) - val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => - BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) + val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) for (batch <- batchesToArchive) yield { val filteredDF = assessmentData .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) From a3245f870ccb1518dc4a1ecd633b01e6ed0ca9b4 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 12:15:09 +0530 Subject: [PATCH 23/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 1baa44a07..b87115bf1 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -78,7 +78,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) - if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isInCollection(batchIds)) else assessmentDF + if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { From 4380e6610ae6c0a13409e4f7c71e1fbc3a4e4774 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 12:22:45 +0530 Subject: [PATCH 24/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index b87115bf1..96a5f163a 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,7 +56,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches).cache() + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) .withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) @@ -77,7 +77,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { - val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) + val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()).persist() if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } From acff4297b3d3fd31d8e5189fe96c5997c70578ed Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 12:40:07 +0530 Subject: [PATCH 25/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 96a5f163a..7b3c08cf6 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -77,7 +77,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { - val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()).persist() + val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } From e96faaf31b2b74e5ec74f12cd7213cfa08457dfb Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 13:34:03 +0530 Subject: [PATCH 26/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../job/report/AssessmentArchivalJob.scala | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 7b3c08cf6..5d2909fe7 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,24 +56,33 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) - .withColumn("updated_on", to_timestamp(col("updated_on"))) + // val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) + getAssessmentData(sparkSession, fetchData, batches).withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) .withColumn("question", to_json(col("question"))) - val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() - val archivedBatchCount = new AtomicInteger(archivedBatchList.length) - JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) - val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) - for (batch <- batchesToArchive) yield { - val filteredDF = assessmentData - .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) - upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) - val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) - JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) - assessmentData.unpersist() - metrics - } + .coalesce(1) + .write + .partitionBy(partitionCols: _*) + .mode("overwrite") + .format("com.databricks.spark.csv") + .option("header", "true") + .save(AppConf.getConfig("assessment.archival.path")) + Array(Map("batch_id" -> "")) + + // val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + // val archivedBatchCount = new AtomicInteger(archivedBatchList.length) + // JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) + // val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) + // for (batch <- batchesToArchive) yield { + // val filteredDF = assessmentData + // .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) + // upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + // val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) + // JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) + // assessmentData.unpersist() + // metrics + // } } def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { @@ -82,7 +91,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { - // sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") + // sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") JobLogger.log(s"The Job Cleared The Table Data SuccessFully, Please Execute The Compaction", None, INFO) } From 94b626970d4fb3d46fe8578f685be26e126f9b5a Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 13:55:38 +0530 Subject: [PATCH 27/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../org/sunbird/analytics/job/report/AssessmentArchivalJob.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 5d2909fe7..bd5fd8581 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -87,6 +87,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) + print("assessmentDF.count()" + assessmentDF.count()) if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } From 10c2069f7b817190ba0337ea845e777b032bdcca Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 14:24:35 +0530 Subject: [PATCH 28/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../analytics/job/report/AssessmentArchivalJob.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index bd5fd8581..5a6e18be0 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -87,8 +87,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) - print("assessmentDF.count()" + assessmentDF.count()) - if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF + import spark.implicits._ + if (batchIds.nonEmpty) { + val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") + assessmentDF.join(batchListDF, Seq("batch_id"), "inner") + } else { + assessmentDF + } + //if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { From 0d0ae86517c66966121a411687efec36a365aaea Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 14:33:02 +0530 Subject: [PATCH 29/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../job/report/AssessmentArchivalJob.scala | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 5a6e18be0..d0e57e6ed 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,38 +56,38 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - // val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches) - getAssessmentData(sparkSession, fetchData, batches).withColumn("updated_on", to_timestamp(col("updated_on"))) - .withColumn("year", year(col("updated_on"))) - .withColumn("week_of_year", weekofyear(col("updated_on"))) - .withColumn("question", to_json(col("question"))) - .coalesce(1) - .write - .partitionBy(partitionCols: _*) - .mode("overwrite") - .format("com.databricks.spark.csv") - .option("header", "true") - .save(AppConf.getConfig("assessment.archival.path")) - Array(Map("batch_id" -> "")) - - // val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() - // val archivedBatchCount = new AtomicInteger(archivedBatchList.length) - // JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) - // val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) - // for (batch <- batchesToArchive) yield { - // val filteredDF = assessmentData - // .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) - // upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) - // val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) - // JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) - // assessmentData.unpersist() - // metrics - // } + val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches).persist() +// getAssessmentData(sparkSession, fetchData, batches).withColumn("updated_on", to_timestamp(col("updated_on"))) +// .withColumn("year", year(col("updated_on"))) +// .withColumn("week_of_year", weekofyear(col("updated_on"))) +// .withColumn("question", to_json(col("question"))) +// .coalesce(1) +// .write +// .partitionBy(partitionCols: _*) +// .mode("overwrite") +// .format("com.databricks.spark.csv") +// .option("header", "true") +// .save(AppConf.getConfig("assessment.archival.path")) +// Array(Map("batch_id" -> "")) + + val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + val archivedBatchCount = new AtomicInteger(archivedBatchList.length) + JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) + val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) + for (batch <- batchesToArchive) yield { + val filteredDF = assessmentData + .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) + upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) + JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) + assessmentData.unpersist() + metrics + } } def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { - val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) import spark.implicits._ + val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) if (batchIds.nonEmpty) { val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") assessmentDF.join(batchListDF, Seq("batch_id"), "inner") From 753d76393bf8a713884d3c07a7768d018bfbbf84 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 14:42:38 +0530 Subject: [PATCH 30/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../analytics/job/report/AssessmentArchivalJob.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index d0e57e6ed..6c709a671 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -88,12 +88,8 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { import spark.implicits._ val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) - if (batchIds.nonEmpty) { - val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") - assessmentDF.join(batchListDF, Seq("batch_id"), "inner") - } else { - assessmentDF - } + val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") + assessmentDF.join(batchListDF, Seq("batch_id"), "left") //if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } From 2b467d4ccab64b29cc1dd67b02ec52ba5daf5029 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 14:50:19 +0530 Subject: [PATCH 31/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../analytics/job/report/AssessmentArchivalJob.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 6c709a671..e49e96945 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,11 +56,11 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentData: DataFrame = getAssessmentData(sparkSession, fetchData, batches).persist() -// getAssessmentData(sparkSession, fetchData, batches).withColumn("updated_on", to_timestamp(col("updated_on"))) -// .withColumn("year", year(col("updated_on"))) -// .withColumn("week_of_year", weekofyear(col("updated_on"))) -// .withColumn("question", to_json(col("question"))) + val assessmentDF : DataFrame = getAssessmentData(sparkSession, fetchData, batches).persist() + val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) + .withColumn("year", year(col("updated_on"))) + .withColumn("week_of_year", weekofyear(col("updated_on"))) + .withColumn("question", to_json(col("question"))) // .coalesce(1) // .write // .partitionBy(partitionCols: _*) From ce5b0c8bb42bbf0d9406bf5c3b9f2495ed4de20b Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 15:02:44 +0530 Subject: [PATCH 32/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../analytics/job/report/AssessmentArchivalJob.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index e49e96945..33b6490f4 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -88,8 +88,15 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { import spark.implicits._ val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) - val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") - assessmentDF.join(batchListDF, Seq("batch_id"), "left") + if (batchIds.nonEmpty) { + val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") + assessmentDF.join(batchListDF, Seq("batch_id"), "inner") + } else { + assessmentDF + } + + // val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") + // assessmentDF.join(batchListDF, Seq("batch_id"), "left") //if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } From 00fdc7c3157ece337c3030a486301bea987c24b4 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Tue, 20 Jul 2021 16:49:00 +0530 Subject: [PATCH 33/55] Issue SB-25481 feat: Assessment archival data product persist issue --- .../job/report/AssessmentArchivalJob.scala | 45 +++++++------------ 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 33b6490f4..3aebb69d6 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -56,33 +56,24 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentDF : DataFrame = getAssessmentData(sparkSession, fetchData, batches).persist() - val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) + val assessmentDF: DataFrame = getAssessmentData(sparkSession, fetchData, batches) + val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) .withColumn("question", to_json(col("question"))) -// .coalesce(1) -// .write -// .partitionBy(partitionCols: _*) -// .mode("overwrite") -// .format("com.databricks.spark.csv") -// .option("header", "true") -// .save(AppConf.getConfig("assessment.archival.path")) -// Array(Map("batch_id" -> "")) - - val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() - val archivedBatchCount = new AtomicInteger(archivedBatchList.length) - JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) - val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) - for (batch <- batchesToArchive) yield { - val filteredDF = assessmentData - .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) - upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) - val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) - JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) - assessmentData.unpersist() - metrics - } + val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + val archivedBatchCount = new AtomicInteger(archivedBatchList.length) + JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) + val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) + for (batch <- batchesToArchive) yield { + val filteredDF = assessmentData + .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) + upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) + JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) + assessmentData.unpersist() + metrics + } } def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { @@ -90,14 +81,10 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) if (batchIds.nonEmpty) { val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") - assessmentDF.join(batchListDF, Seq("batch_id"), "inner") + assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() } else { assessmentDF } - - // val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") - // assessmentDF.join(batchListDF, Seq("batch_id"), "left") - //if (batchIds.nonEmpty) assessmentDF.filter(col("batch_id").isin(batchIds: _*)) else assessmentDF } def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { From 9d0e8b1850acafe2817cb0fec4d00d436fd06977 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 9 Aug 2021 11:25:02 +0530 Subject: [PATCH 34/55] Issue SB-24793 feat: Assessment data archival configuration update --- .../analytics/job/report/AssessmentArchivalJob.scala | 7 +++++-- data-products/src/test/resources/application.conf | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 3aebb69d6..ba0734a08 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -41,12 +41,15 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val total_archived_files = res._2.length if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) + } catch { + case ex: Exception => { + ex.printStackTrace() + JobLogger.end(s"$jobName completed execution with the error ${ex.getMessage}", "FAILED", None) + } } finally { frameworkContext.closeContext() spark.close() } - - } def init()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Unit = { diff --git a/data-products/src/test/resources/application.conf b/data-products/src/test/resources/application.conf index 9ff1086f6..719cf2953 100644 --- a/data-products/src/test/resources/application.conf +++ b/data-products/src/test/resources/application.conf @@ -164,6 +164,7 @@ sunbird.report.cluster.host=127.0.0.1 sunbird.user.report.keyspace="sunbird_courses" collection.exhaust.store.prefix="reports" postgres.table.job_request="job_request" +sunbird.courses.assessment.table = "assessment_aggregator" druid.report.default.storage="local" druid.report.date.format="yyyy-MM-dd" From 41b1d7219eccbe9fe9447d514dd4bcf6be52357e Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 9 Aug 2021 16:31:39 +0530 Subject: [PATCH 35/55] Issue SB-24793 feat: Assessment data archival configuration update --- .../job/report/AssessmentArchivalJob.scala | 49 ++++++++++------ .../assessment-archival/assessment_agg.cql | 8 +++ .../report/TestAssessmentArchivalJob.scala | 58 +++++++++---------- 3 files changed, 66 insertions(+), 49 deletions(-) create mode 100644 data-products/src/test/resources/assessment-archival/assessment_agg.cql diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index ba0734a08..7cf4fd0df 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -37,7 +37,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] init() try { - val res = CommonUtil.time(archiveData(spark, fetchData, jobConfig)) + val res = CommonUtil.time(archiveData(spark, jobConfig)) val total_archived_files = res._2.length if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) @@ -57,29 +57,42 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } // $COVERAGE-ON$ - def archiveData(sparkSession: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, jobConfig: JobConfig): Array[Map[String, Any]] = { - val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList - val assessmentDF: DataFrame = getAssessmentData(sparkSession, fetchData, batches) + def archiveData(sparkSession: SparkSession, jobConfig: JobConfig): Array[Map[String, Any]] = { + val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList.filter(x => x.nonEmpty) + val assessmentDF: DataFrame = getAssessmentData(sparkSession, batches) val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) .withColumn("question", to_json(col("question"))) - val archivedBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() - val archivedBatchCount = new AtomicInteger(archivedBatchList.length) - JobLogger.log(s"Total Batches to Archive By Year & Week $archivedBatchCount", None, INFO) - val batchesToArchive: Array[BatchPartition] = archivedBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])) - for (batch <- batchesToArchive) yield { - val filteredDF = assessmentData - .filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) - upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) - val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_batches" -> archivedBatchCount.getAndDecrement(), "total_records" -> filteredDF.count()) - JobLogger.log(s"Data is archived and Remaining batches to archive is ", Some(metrics), INFO) - assessmentData.unpersist() - metrics - } + + val archiveBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + val batchesToArchiveCount = new AtomicInteger(archiveBatchList.length) + JobLogger.log(s"Total Batches to Archive By Year & Week $batchesToArchiveCount", None, INFO) + + val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])).groupBy(_.batch_id) + + batchesToArchive.flatMap(batches => { + val processingBatch = new AtomicInteger(batches._2.length) + JobLogger.log(s"Started Processing to archive the data", Some(Map("batch_id" -> batches._1, "archival_count" -> batches._2.length)), INFO) + val res = for (batch <- batches._2) yield { + val filteredDF = assessmentData.filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) + upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_part_files" -> processingBatch.getAndDecrement(), "total_records" -> filteredDF.count()) + JobLogger.log(s"Data is archived and Processing the remaining part files ", Some(metrics), INFO) + assessmentData.unpersist() + metrics + } + removeRecords(batches._1) + JobLogger.log(s"The data archival is successful", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) + res + }).toArray + } + + def removeRecords(batchId: String): Unit = { + JobLogger.log(s"Deleting the records for the batch $batchId from the DB", None, INFO) } - def getAssessmentData(spark: SparkSession, fetchData: (SparkSession, Map[String, String], String, StructType) => DataFrame, batchIds: List[String]): DataFrame = { + def getAssessmentData(spark: SparkSession, batchIds: List[String]): DataFrame = { import spark.implicits._ val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) if (batchIds.nonEmpty) { diff --git a/data-products/src/test/resources/assessment-archival/assessment_agg.cql b/data-products/src/test/resources/assessment-archival/assessment_agg.cql new file mode 100644 index 000000000..9e8b35306 --- /dev/null +++ b/data-products/src/test/resources/assessment-archival/assessment_agg.cql @@ -0,0 +1,8 @@ +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, question, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-001', 'do_1128870328040161281204', 'attempat-001', '20', 20, 20, [{id: 'do_213019475454476288155', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'testQuestiontextandformula', resvalues: [{'1': '{"text":"A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"A=\\\\pi r^2\n"}'}, {'2': '{"text":"no\n"}'}, {'answer': '{"correct":["1"]}'}], description: 'testQuestiontextandformula', duration: 1.0}, {id: 'do_213019970118279168165', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'test with formula', resvalues: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}, {'2': '{"text":"2\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 1.0}, {id: 'do_213019972814823424168', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 0.33, type: 'mtf', title: 'Copy of - Match the following:\n\nx=\\frac{-b\\pm\\sqrt{b^2-4ac}}{2a}\nArrange the following equations in correct order.\n', resvalues: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Area of Circle\\n\"}"},{"3":"{\"text\":\"Product Rule\\n\"}"}]'}], params: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Product Rule\\n\"}"},{"3":"{\"text\":\"Area of Circle\\n\"}"}]'}, {'answer': '{"lhs":["1","2","3"],"rhs":["3","1","2"]}'}], description: '', duration: 2.0}, {id: 'do_2130256513760624641171', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 10, score: 10, type: 'mcq', title: '2 +2 is..? mark ia 10\n', resvalues: [{'1': '{"text":"4\n"}'}], params: [{'1': '{"text":"4\n"}'}, {'2': '{"text":"3\n"}'}, {'3': '{"text":"8\n"}'}, {'4': '{"text":"10\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 12.0}], toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, question, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-001', 'do_1128870328040161281204', 'attempat-001', '20', 20, 20, [{id: 'do_213019475454476288155', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'testQuestiontextandformula', resvalues: [{'1': '{"text":"A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"A=\\\\pi r^2\n"}'}, {'2': '{"text":"no\n"}'}, {'answer': '{"correct":["1"]}'}], description: 'testQuestiontextandformula', duration: 1.0}, {id: 'do_213019970118279168165', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'test with formula', resvalues: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}, {'2': '{"text":"2\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 1.0}, {id: 'do_213019972814823424168', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 0.33, type: 'mtf', title: 'Copy of - Match the following:\n\nx=\\frac{-b\\pm\\sqrt{b^2-4ac}}{2a}\nArrange the following equations in correct order.\n', resvalues: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Area of Circle\\n\"}"},{"3":"{\"text\":\"Product Rule\\n\"}"}]'}], params: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Product Rule\\n\"}"},{"3":"{\"text\":\"Area of Circle\\n\"}"}]'}, {'answer': '{"lhs":["1","2","3"],"rhs":["3","1","2"]}'}], description: '', duration: 2.0}, {id: 'do_2130256513760624641171', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 10, score: 10, type: 'mcq', title: '2 +2 is..? mark ia 10\n', resvalues: [{'1': '{"text":"4\n"}'}], params: [{'1': '{"text":"4\n"}'}, {'2': '{"text":"3\n"}'}, {'3': '{"text":"8\n"}'}, {'4': '{"text":"10\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 12.0}], toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-002', 'do_1128870328040161281204', 'attempat-001', '10', 10, 10, toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-003', 'do_112876961957437440179', 'attempat-001', '10', 10, 10, toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-003', 'do_112876961957437440179', 'attempat-001', '10', 10, 10, toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_11306040245271756813015', 'batch-001', 'user-008', 'do_112876961957437440179', 'attempat-001', '10', 10, 10, toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_11306040245271756813015', 'batch-001', 'user-010', 'do_11307593493010022418', 'attempat-001', '15', 15, 15, toTimeStamp(toDate(now()))); +INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_112835334818643968148', 'batch-004', 'user-014', 'do_11307593493010022418', 'attempat-001', '15', 15, 15, toTimeStamp(toDate(now()))); \ No newline at end of file diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index fd33988d9..ff20a01ad 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -8,6 +8,7 @@ import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.{HadoopFileUtil, JSONUtils} import org.ekstep.analytics.framework.{FrameworkContext, JobConfig} import org.scalamock.scalatest.MockFactory +import org.sunbird.analytics.util.EmbeddedCassandra import scala.collection.mutable @@ -23,12 +24,7 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { override def beforeAll(): Unit = { super.beforeAll() spark = getSparkSession(); - assessmentAggDF = spark - .read - .format("com.databricks.spark.csv") - .option("header", "true") - .load("src/test/resources/assessment-archival/assessment_aggregator.csv") - .cache() + EmbeddedCassandra.loadData("src/test/resources/assessment-archival/assessment_agg.cql") // Load test data in embedded cassandra server } override def afterAll(): Unit = { @@ -44,34 +40,34 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { }, new ArrayType(MapType(StringType, StringType), true)) it should "Should able to archive the batch data" in { - initializeDefaultMockData() + //initializeDefaultMockData() implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"truncateData":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.archiveData(spark, reporterMock.fetchData, jobConfig) - - val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1010") - batch_1.foreach(res => res("year") === "2019") - batch_1.foreach(res => res("total_records") === "2") - batch_1.foreach(res => res("week_of_year") === "36") - - - val batch_2 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1001") - batch_2.foreach(res => res("year") === "2019") - batch_2.foreach(res => res("total_records") === "3") - batch_2.foreach(res => res("week_of_year") === "36") - - - val batch_3 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1005") - batch_3.foreach(res => res("year") === "2019") - batch_3.foreach(res => res("total_records") === "1") - batch_3.foreach(res => res("week_of_year") === "36") - - - val batch_4 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1006") - batch_4.foreach(res => res("year") === "2019") - batch_4.foreach(res => res("total_records") === "2") - batch_4.foreach(res => res("week_of_year") === "36") + val reportData = AssessmentArchivalJob.archiveData(spark, jobConfig) + println("JSON" + JSONUtils.serialize(reportData)) +// val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1010") +// batch_1.foreach(res => res("year") === "2019") +// batch_1.foreach(res => res("total_records") === "2") +// batch_1.foreach(res => res("week_of_year") === "36") +// +// +// val batch_2 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1001") +// batch_2.foreach(res => res("year") === "2019") +// batch_2.foreach(res => res("total_records") === "3") +// batch_2.foreach(res => res("week_of_year") === "36") +// +// +// val batch_3 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1005") +// batch_3.foreach(res => res("year") === "2019") +// batch_3.foreach(res => res("total_records") === "1") +// batch_3.foreach(res => res("week_of_year") === "36") +// +// +// val batch_4 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1006") +// batch_4.foreach(res => res("year") === "2019") +// batch_4.foreach(res => res("total_records") === "2") +// batch_4.foreach(res => res("week_of_year") === "36") } From 0c4b0f76b1896c147ab9ce1c89774e085f6cc862 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 11 Aug 2021 11:12:49 +0530 Subject: [PATCH 36/55] Issue SB-24793 feat: Assessment data archival configuration update --- .../job/report/AssessmentArchivalJob.scala | 17 +++---- .../report/TestAssessmentArchivalJob.scala | 48 ++++--------------- 2 files changed, 17 insertions(+), 48 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 7cf4fd0df..5239de503 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -24,7 +24,6 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-OFF$ Disabling scoverage for main and execute method override def main(config: String)(implicit sc: Option[SparkContext], fc: Option[FrameworkContext]): Unit = { - implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" val jobName = "AssessmentArchivalJob" JobLogger.init(jobName) @@ -39,7 +38,6 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep try { val res = CommonUtil.time(archiveData(spark, jobConfig)) val total_archived_files = res._2.length - if (truncateData) deleteRecords(spark, assessmentAggDBSettings.getOrElse("keyspace", "sunbird_courses"), assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")) else JobLogger.log(s"Skipping the ${assessmentAggDBSettings.getOrElse("table", "assessment_aggregator")} truncate process", None, INFO) JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) } catch { case ex: Exception => { @@ -58,7 +56,10 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, jobConfig: JobConfig): Array[Map[String, Any]] = { + val modelParams = jobConfig.modelParams.get + val deleteArchivedBatch: Boolean = modelParams.getOrElse("deleteArchivedBatch", false).asInstanceOf[Boolean] val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList.filter(x => x.nonEmpty) + val assessmentDF: DataFrame = getAssessmentData(sparkSession, batches) val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) @@ -73,7 +74,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep batchesToArchive.flatMap(batches => { val processingBatch = new AtomicInteger(batches._2.length) - JobLogger.log(s"Started Processing to archive the data", Some(Map("batch_id" -> batches._1, "archival_count" -> batches._2.length)), INFO) + JobLogger.log(s"Started Processing to archive the data", Some(Map("batch_id" -> batches._1, "total_part_files_to_archive" -> batches._2.length)), INFO) val res = for (batch <- batches._2) yield { val filteredDF = assessmentData.filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) @@ -82,13 +83,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep assessmentData.unpersist() metrics } - removeRecords(batches._1) + if(deleteArchivedBatch) removeRecords(sparkSession, batches._1) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) JobLogger.log(s"The data archival is successful", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) res }).toArray } - def removeRecords(batchId: String): Unit = { + def removeRecords(sparkSession: SparkSession, batchId: String): Unit = { + sparkSession.sql(s"DELETE FROM ${AppConf.getConfig("sunbird.courses.keyspace")}.${AppConf.getConfig("sunbird.courses.assessment.table")} WHERE batch_id = $batchId") JobLogger.log(s"Deleting the records for the batch $batchId from the DB", None, INFO) } @@ -103,11 +105,6 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } } - def deleteRecords(sparkSession: SparkSession, keyspace: String, table: String): Unit = { - // sparkSession.sql(s"TRUNCATE TABLE $keyspace.$table") - JobLogger.log(s"The Job Cleared The Table Data SuccessFully, Please Execute The Compaction", None, INFO) - } - def upload(archivedData: DataFrame, batch: BatchPartition, jobConfig: JobConfig): List[String] = { diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index ff20a01ad..fe4c7e21e 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -1,8 +1,6 @@ package org.sunbird.analytics.job.report -import org.apache.spark.sql.functions.udf -import org.apache.spark.sql.types.{ArrayType, MapType, StringType, StructType} import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.{HadoopFileUtil, JSONUtils} @@ -10,8 +8,6 @@ import org.ekstep.analytics.framework.{FrameworkContext, JobConfig} import org.scalamock.scalatest.MockFactory import org.sunbird.analytics.util.EmbeddedCassandra -import scala.collection.mutable - class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { @@ -33,47 +29,23 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { new HadoopFileUtil().delete(spark.sparkContext.hadoopConfiguration, objectKey + "assessment-archival") } - val convertMethod = udf((value: mutable.WrappedArray[String]) => { - if (null != value && value.nonEmpty) - value.toList.map(str => JSONUtils.deserialize(str)(manifest[Map[String, String]])).toArray - else null - }, new ArrayType(MapType(StringType, StringType), true)) it should "Should able to archive the batch data" in { - //initializeDefaultMockData() implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"truncateData":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) val reportData = AssessmentArchivalJob.archiveData(spark, jobConfig) - println("JSON" + JSONUtils.serialize(reportData)) -// val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1010") -// batch_1.foreach(res => res("year") === "2019") -// batch_1.foreach(res => res("total_records") === "2") -// batch_1.foreach(res => res("week_of_year") === "36") -// -// -// val batch_2 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1001") -// batch_2.foreach(res => res("year") === "2019") -// batch_2.foreach(res => res("total_records") === "3") -// batch_2.foreach(res => res("week_of_year") === "36") -// -// -// val batch_3 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1005") -// batch_3.foreach(res => res("year") === "2019") -// batch_3.foreach(res => res("total_records") === "1") -// batch_3.foreach(res => res("week_of_year") === "36") -// -// -// val batch_4 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "1006") -// batch_4.foreach(res => res("year") === "2019") -// batch_4.foreach(res => res("total_records") === "2") -// batch_4.foreach(res => res("week_of_year") === "36") - } + val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "batch-001") + batch_1.foreach(res => res("year") === "2021") + batch_1.foreach(res => res("total_records") === "5") + batch_1.foreach(res => res("week_of_year") === "32") + + val batch_2 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "batch-004") + batch_2.foreach(res => res("year") === "2021") + batch_2.foreach(res => res("total_records") === "1") + batch_2.foreach(res => res("week_of_year") === "32") - def initializeDefaultMockData() { - (reporterMock.fetchData _) - .expects(spark, Map("table" -> "assessment_aggregator", "keyspace" -> sunbirdCoursesKeyspace, "cluster" -> "LMSCluster"), "org.apache.spark.sql.cassandra", new StructType()) - .returning(assessmentAggDF) } + } \ No newline at end of file From d156bea7633b9856f8cc40da74e5f851b74e6b22 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 11 Aug 2021 11:15:23 +0530 Subject: [PATCH 37/55] Issue SB-24793 feat: Assessment data archival configuration update --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 5239de503..46b3d4444 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -33,7 +33,6 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep implicit val frameworkContext: FrameworkContext = getReportingFrameworkContext() val modelParams = jobConfig.modelParams.get - val truncateData: Boolean = modelParams.getOrElse("truncateData", "false").asInstanceOf[Boolean] init() try { val res = CommonUtil.time(archiveData(spark, jobConfig)) @@ -83,7 +82,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep assessmentData.unpersist() metrics } - if(deleteArchivedBatch) removeRecords(sparkSession, batches._1) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) + if (deleteArchivedBatch) removeRecords(sparkSession, batches._1) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) JobLogger.log(s"The data archival is successful", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) res }).toArray From 76254c6f67c889364482089714fe7d077bdef4b5 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 11 Aug 2021 16:09:13 +0530 Subject: [PATCH 38/55] Issue SB-24793 feat: deleting the records after archival --- .../job/report/AssessmentArchivalJob.scala | 11 ++++++----- .../assessment-archival/assessment_agg.cql | 16 ++++++++++++++++ .../analytics/job/report/BaseReportSpec.scala | 12 ++++++------ .../org/sunbird/analytics/util/BaseSpec.scala | 1 + 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 46b3d4444..2b2e24052 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -1,6 +1,7 @@ package org.sunbird.analytics.job.report import com.datastax.spark.connector.cql.CassandraConnectorConf +import com.datastax.spark.connector.{SomeColumns, toRDDFunctions} import org.apache.spark.SparkContext import org.apache.spark.sql.cassandra.CassandraSparkSessionFunctions import org.apache.spark.sql.functions._ @@ -30,9 +31,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.start(s"$jobName started executing", Option(Map("config" -> config, "model" -> jobName))) implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](config) implicit val spark: SparkSession = openSparkSession(jobConfig) - implicit val frameworkContext: FrameworkContext = getReportingFrameworkContext() - val modelParams = jobConfig.modelParams.get init() try { val res = CommonUtil.time(archiveData(spark, jobConfig)) @@ -82,14 +81,16 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep assessmentData.unpersist() metrics } - if (deleteArchivedBatch) removeRecords(sparkSession, batches._1) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) + if (deleteArchivedBatch) removeRecords(batches._1, assessmentDF) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) JobLogger.log(s"The data archival is successful", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) res }).toArray } - def removeRecords(sparkSession: SparkSession, batchId: String): Unit = { - sparkSession.sql(s"DELETE FROM ${AppConf.getConfig("sunbird.courses.keyspace")}.${AppConf.getConfig("sunbird.courses.assessment.table")} WHERE batch_id = $batchId") + def removeRecords(batchId: String, assessmentDF: DataFrame): Unit = { + val batchData = assessmentDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id") + .where(col("batch_id") === batchId).rdd + batchData.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) JobLogger.log(s"Deleting the records for the batch $batchId from the DB", None, INFO) } diff --git a/data-products/src/test/resources/assessment-archival/assessment_agg.cql b/data-products/src/test/resources/assessment-archival/assessment_agg.cql index 9e8b35306..88d520260 100644 --- a/data-products/src/test/resources/assessment-archival/assessment_agg.cql +++ b/data-products/src/test/resources/assessment-archival/assessment_agg.cql @@ -1,3 +1,19 @@ +CREATE TABLE IF NOT EXISTS sunbird_courses.assessment_aggregator ( + user_id text, + course_id text, + batch_id text, + content_id text, + attempt_id text, + created_on timestamp, + grand_total text, + last_attempted_on timestamp, + question list>, + total_max_score double, + total_score double, + updated_on timestamp, + PRIMARY KEY ((user_id, course_id), batch_id, content_id, attempt_id) +); + INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, question, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-001', 'do_1128870328040161281204', 'attempat-001', '20', 20, 20, [{id: 'do_213019475454476288155', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'testQuestiontextandformula', resvalues: [{'1': '{"text":"A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"A=\\\\pi r^2\n"}'}, {'2': '{"text":"no\n"}'}, {'answer': '{"correct":["1"]}'}], description: 'testQuestiontextandformula', duration: 1.0}, {id: 'do_213019970118279168165', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'test with formula', resvalues: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}, {'2': '{"text":"2\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 1.0}, {id: 'do_213019972814823424168', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 0.33, type: 'mtf', title: 'Copy of - Match the following:\n\nx=\\frac{-b\\pm\\sqrt{b^2-4ac}}{2a}\nArrange the following equations in correct order.\n', resvalues: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Area of Circle\\n\"}"},{"3":"{\"text\":\"Product Rule\\n\"}"}]'}], params: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Product Rule\\n\"}"},{"3":"{\"text\":\"Area of Circle\\n\"}"}]'}, {'answer': '{"lhs":["1","2","3"],"rhs":["3","1","2"]}'}], description: '', duration: 2.0}, {id: 'do_2130256513760624641171', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 10, score: 10, type: 'mcq', title: '2 +2 is..? mark ia 10\n', resvalues: [{'1': '{"text":"4\n"}'}], params: [{'1': '{"text":"4\n"}'}, {'2': '{"text":"3\n"}'}, {'3': '{"text":"8\n"}'}, {'4': '{"text":"10\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 12.0}], toTimeStamp(toDate(now()))); INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, question, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-001', 'do_1128870328040161281204', 'attempat-001', '20', 20, 20, [{id: 'do_213019475454476288155', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'testQuestiontextandformula', resvalues: [{'1': '{"text":"A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"A=\\\\pi r^2\n"}'}, {'2': '{"text":"no\n"}'}, {'answer': '{"correct":["1"]}'}], description: 'testQuestiontextandformula', duration: 1.0}, {id: 'do_213019970118279168165', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 1, type: 'mcq', title: 'test with formula', resvalues: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}], params: [{'1': '{"text":"1\nA=\\\\pi r^2A=\\\\pi r^2\n"}'}, {'2': '{"text":"2\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 1.0}, {id: 'do_213019972814823424168', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 1, score: 0.33, type: 'mtf', title: 'Copy of - Match the following:\n\nx=\\frac{-b\\pm\\sqrt{b^2-4ac}}{2a}\nArrange the following equations in correct order.\n', resvalues: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Area of Circle\\n\"}"},{"3":"{\"text\":\"Product Rule\\n\"}"}]'}], params: [{'lhs': '[{"1":"{\"text\":\"A=\\\\\\\\pi r^2\\n\"}"},{"2":"{\"text\":\"\\\\\\\\frac{4}{3}\\\\\\\\pi r^3\\n\"}"},{"3":"{\"text\":\"a^n\\\\\\\\times a^m=a^{n+m}\\n\"}"}]'}, {'rhs': '[{"1":"{\"text\":\"Volume of sphere\\n\"}"},{"2":"{\"text\":\"Product Rule\\n\"}"},{"3":"{\"text\":\"Area of Circle\\n\"}"}]'}, {'answer': '{"lhs":["1","2","3"],"rhs":["3","1","2"]}'}], description: '', duration: 2.0}, {id: 'do_2130256513760624641171', assess_ts: '2020-06-18T18:15:56.490+0000', max_score: 10, score: 10, type: 'mcq', title: '2 +2 is..? mark ia 10\n', resvalues: [{'1': '{"text":"4\n"}'}], params: [{'1': '{"text":"4\n"}'}, {'2': '{"text":"3\n"}'}, {'3': '{"text":"8\n"}'}, {'4': '{"text":"10\n"}'}, {'answer': '{"correct":["1"]}'}], description: '', duration: 12.0}], toTimeStamp(toDate(now()))); INSERT INTO sunbird_courses.assessment_aggregator (course_id, batch_id, user_id, content_id, attempt_id, grand_total, total_max_score, total_score, updated_on) VALUES ('do_1130928636168192001667', 'batch-001', 'user-002', 'do_1128870328040161281204', 'attempat-001', '10', 10, 10, toTimeStamp(toDate(now()))); diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala index 8e53554bc..430ece00f 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala @@ -17,16 +17,16 @@ class BaseReportSpec extends BaseSpec with BeforeAndAfterAll { val cbatchAliasMapping = spark.sparkContext.textFile("src/test/resources/reports/cbatch_alias_mapping.json", 1).collect().head val cbatchAssessmentAliasMapping = spark.sparkContext.textFile("src/test/resources/reports/cbatch_assessment_alias_mapping.json", 1).collect().head - EmbeddedES.start( - Array( - EsIndex("compositesearch", Option("cs"), Option(csMapping), None), - EsIndex("cbatch", None, None, Option(cbatchAliasMapping)), - EsIndex("cbatch-assessment-08-07-2018-11-00", None, None, Option(cbatchAssessmentAliasMapping)))) +// EmbeddedES.start( +// Array( +// EsIndex("compositesearch", Option("cs"), Option(csMapping), None), +// EsIndex("cbatch", None, None, Option(cbatchAliasMapping)), +// EsIndex("cbatch-assessment-08-07-2018-11-00", None, None, Option(cbatchAssessmentAliasMapping)))) } override def afterAll() { //super.afterAll(); - EmbeddedES.stop() + //EmbeddedES.stop() } } \ No newline at end of file diff --git a/data-products/src/test/scala/org/sunbird/analytics/util/BaseSpec.scala b/data-products/src/test/scala/org/sunbird/analytics/util/BaseSpec.scala index 65ea98b09..a6bcc2c91 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/util/BaseSpec.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/util/BaseSpec.scala @@ -26,6 +26,7 @@ class BaseSpec extends FlatSpec with Matchers with BeforeAndAfterAll { conf.set("spark.redis.host", "localhost") conf.set("spark.redis.port", "6341") conf.set("spark.redis.db", "0") + conf.set("spark.cassandra.output.concurrent.writes", "12") conf; } From f7b74cd5ee7129986edcda20d0ecb1e7214834bd Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Thu, 12 Aug 2021 16:50:44 +0530 Subject: [PATCH 39/55] Issue SB-24793 feat: deleting the records after archival --- .../job/report/AssessmentArchivalJob.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 2b2e24052..46c799ff3 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -20,6 +20,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep private val assessmentAggDBSettings: Map[String, String] = Map("table" -> AppConf.getConfig("sunbird.courses.assessment.table"), "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" private val partitionCols = List("batch_id", "year", "week_of_year") + private val columnWithOrder = List("course_id", "batch_id","user_id", "content_id", "attempt_id", "created_on", "grand_total", "last_attempted_on", "total_max_score", "total_score", "updated_on", "question") case class BatchPartition(batch_id: String, year: Int, week_of_year: Int) @@ -59,6 +60,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList.filter(x => x.nonEmpty) val assessmentDF: DataFrame = getAssessmentData(sparkSession, batches) + val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) @@ -69,29 +71,27 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.log(s"Total Batches to Archive By Year & Week $batchesToArchiveCount", None, INFO) val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])).groupBy(_.batch_id) - batchesToArchive.flatMap(batches => { val processingBatch = new AtomicInteger(batches._2.length) JobLogger.log(s"Started Processing to archive the data", Some(Map("batch_id" -> batches._1, "total_part_files_to_archive" -> batches._2.length)), INFO) val res = for (batch <- batches._2) yield { - val filteredDF = assessmentData.filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year) - upload(filteredDF.drop("year", "week_of_year"), batch, jobConfig) + val filteredDF = assessmentData.filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year).select(columnWithOrder.head, columnWithOrder.tail: _*) + upload(filteredDF, batch, jobConfig) val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_part_files" -> processingBatch.getAndDecrement(), "total_records" -> filteredDF.count()) JobLogger.log(s"Data is archived and Processing the remaining part files ", Some(metrics), INFO) assessmentData.unpersist() metrics } if (deleteArchivedBatch) removeRecords(batches._1, assessmentDF) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) - JobLogger.log(s"The data archival is successful", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) + JobLogger.log(s"${batches._1} is successfully archived", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) res }).toArray } def removeRecords(batchId: String, assessmentDF: DataFrame): Unit = { - val batchData = assessmentDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id") - .where(col("batch_id") === batchId).rdd + val batchData = assessmentDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").filter(col("batch_id") === batchId).rdd batchData.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) - JobLogger.log(s"Deleting the records for the batch $batchId from the DB", None, INFO) + JobLogger.log(s"Deleted ${batchData.count} records for the batch $batchId from the DB", None, INFO) } def getAssessmentData(spark: SparkSession, batchIds: List[String]): DataFrame = { From d2a8573dd52c6783e8698f1851aa6f82463b8659 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 13 Aug 2021 15:42:34 +0530 Subject: [PATCH 40/55] Issue SB-24793 feat: Assessment archival compress the report as csv.gz file format. --- .../sunbird/analytics/job/report/AssessmentArchivalJob.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 46c799ff3..bda5ec0a6 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -112,13 +112,13 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val reportPath: String = modelParams.getOrElse("reportPath", "archival-data/").asInstanceOf[String] val container = AppConf.getConfig("cloud.container.reports") val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") - val fileName = s"${batch.batch_id}-${batch.year}-${batch.week_of_year}" + val fileName = s"${batch.batch_id}/${batch.year}-${batch.week_of_year}" val storageConfig = getStorageConfig( container, objectKey, jobConfig) JobLogger.log(s"Uploading reports to blob storage", None, INFO) - archivedData.saveToBlobStore(storageConfig, "csv", s"$reportPath$fileName-${System.currentTimeMillis()}", Option(Map("header" -> "true")), None) + archivedData.saveToBlobStore(storageConfig = storageConfig, format = "csv", reportId = s"$reportPath$fileName-${System.currentTimeMillis()}", options = Option(Map("header" -> "true", "codec" -> "org.apache.hadoop.io.compress.GzipCodec")), partitioningColumns = None, fileExt = Some("csv.gz")) } } From 020874f7f92fffc26f92eec7fd9059d306883070 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 16 Aug 2021 10:58:45 +0530 Subject: [PATCH 41/55] Issue SB-24793 feat: Assessment archival to remove the archived records --- .../analytics/exhaust/util/ExhaustUtil.scala | 37 +++++++++++++++++++ .../job/report/AssessmentArchivalJob.scala | 35 +++++++++++++----- .../report/TestAssessmentArchivalJob.scala | 13 ++++++- 3 files changed, 74 insertions(+), 11 deletions(-) create mode 100644 data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala new file mode 100644 index 000000000..06048d45c --- /dev/null +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -0,0 +1,37 @@ +package org.sunbird.analytics.exhaust.util + + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.ekstep.analytics.framework.FrameworkContext +import org.ekstep.analytics.framework.conf.AppConf +import org.ekstep.analytics.framework.util.JobLogger + +object ExhaustUtil { + + def getArchivedData(store: String, filePath: String, bucket: String, blobFields: Map[String, String], fileFormat: Option[String])(implicit spark: SparkSession, fc: FrameworkContext): DataFrame = { + val format = fileFormat.getOrElse("csv.gz") + val batchId = blobFields.getOrElse("batchId", "*") + val year = blobFields.getOrElse("year", "*") + val weekNumb = blobFields.getOrElse("weekNum", "*") + + + val url = store match { + case "local" => + filePath + s"${batchId}/${year}-${weekNumb}-*.${format}" + // $COVERAGE-OFF$ for azure testing + case "s3" | "azure" => + val key = AppConf.getConfig("azure_storage_key") + val file = s"${filePath}${batchId}/${year}-${weekNumb}-*.${format}" + s"wasb://$bucket@$key.blob.core.windows.net/$file." + // $COVERAGE-ON$ + } + + JobLogger.log(s"Fetching data from ${store} ")(new String()) + fetch(url, "csv") + } + + def fetch(url: String, format: String)(implicit spark: SparkSession, fc: FrameworkContext): DataFrame = { + spark.read.format(format).option("header", "true").load(url) + } + +} \ No newline at end of file diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index bda5ec0a6..a1e79c4b6 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -8,10 +8,12 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.Level.INFO +import org.ekstep.analytics.framework.Period.WEEK import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.DatasetUtil.extensions import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} import org.ekstep.analytics.framework.{FrameworkContext, IJob, JobConfig} +import org.sunbird.analytics.exhaust.util.ExhaustUtil import java.util.concurrent.atomic.AtomicInteger @@ -20,7 +22,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep private val assessmentAggDBSettings: Map[String, String] = Map("table" -> AppConf.getConfig("sunbird.courses.assessment.table"), "keyspace" -> AppConf.getConfig("sunbird.courses.keyspace"), "cluster" -> "LMSCluster") implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" private val partitionCols = List("batch_id", "year", "week_of_year") - private val columnWithOrder = List("course_id", "batch_id","user_id", "content_id", "attempt_id", "created_on", "grand_total", "last_attempted_on", "total_max_score", "total_score", "updated_on", "question") + private val columnWithOrder = List("course_id", "batch_id", "user_id", "content_id", "attempt_id", "created_on", "grand_total", "last_attempted_on", "total_max_score", "total_score", "updated_on", "question") case class BatchPartition(batch_id: String, year: Int, week_of_year: Int) @@ -33,9 +35,11 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](config) implicit val spark: SparkSession = openSparkSession(jobConfig) implicit val frameworkContext: FrameworkContext = getReportingFrameworkContext() + val modelParams = jobConfig.modelParams.get + val deleteArchivedBatch: Boolean = modelParams.getOrElse("deleteArchivedBatch", false).asInstanceOf[Boolean] init() try { - val res = CommonUtil.time(archiveData(spark, jobConfig)) + val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords) else CommonUtil.time(archiveData(spark, jobConfig)) val total_archived_files = res._2.length JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) } catch { @@ -55,8 +59,6 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, jobConfig: JobConfig): Array[Map[String, Any]] = { - val modelParams = jobConfig.modelParams.get - val deleteArchivedBatch: Boolean = modelParams.getOrElse("deleteArchivedBatch", false).asInstanceOf[Boolean] val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList.filter(x => x.nonEmpty) val assessmentDF: DataFrame = getAssessmentData(sparkSession, batches) @@ -82,16 +84,31 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep assessmentData.unpersist() metrics } - if (deleteArchivedBatch) removeRecords(batches._1, assessmentDF) else JobLogger.log(s"Skipping the batch deletions ${batches._1}", None, INFO) JobLogger.log(s"${batches._1} is successfully archived", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) res }).toArray } - def removeRecords(batchId: String, assessmentDF: DataFrame): Unit = { - val batchData = assessmentDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").filter(col("batch_id") === batchId).rdd - batchData.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) - JobLogger.log(s"Deleted ${batchData.count} records for the batch $batchId from the DB", None, INFO) + def removeRecords()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[Map[String, Any]] = { + val assessmentDF = fetchRecords() + println("assessmentDF" + assessmentDF.show(false)) + val batchData = assessmentDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").rdd + //batchData.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) + JobLogger.log(s"Deleted ${batchData.count} records for the batch from the DB", None, INFO) + Array() + } + + def fetchRecords()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { + val azureFetcherConfig = config.modelParams.get("archivalFetcherConfig").asInstanceOf[Map[String, AnyRef]] + val store = azureFetcherConfig("store").asInstanceOf[String] + val format: String = azureFetcherConfig.getOrElse("format", "csv.gz").asInstanceOf[String] + val filePath = azureFetcherConfig.getOrElse("filePath", "archival-data/").asInstanceOf[String] + val container = azureFetcherConfig.getOrElse("container", "reports").asInstanceOf[String] + import org.joda.time.{DateTime, DateTimeZone, Days, LocalDate, Weeks, Years} + val s = CommonUtil.getPeriod(System.currentTimeMillis, WEEK) + println("week num" + s) + val blobFields = Map("year" -> "2022", "weekNum" -> "32") + ExhaustUtil.getArchivedData(store, filePath, container, blobFields, Some(format)) } def getAssessmentData(spark: SparkSession, batchIds: List[String]): DataFrame = { diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index fe4c7e21e..be96a536e 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -1,6 +1,7 @@ package org.sunbird.analytics.job.report +import org.apache.spark.SparkContext import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.{HadoopFileUtil, JSONUtils} @@ -11,7 +12,7 @@ import org.sunbird.analytics.util.EmbeddedCassandra class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { - var spark: SparkSession = _ + implicit var spark: SparkSession = _ var assessmentAggDF: DataFrame = _ var reporterMock: BaseReportsJob = mock[BaseReportsJob] @@ -32,7 +33,7 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { it should "Should able to archive the batch data" in { implicit val mockFc: FrameworkContext = mock[FrameworkContext] - val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"truncateData":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin + val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) val reportData = AssessmentArchivalJob.archiveData(spark, jobConfig) @@ -45,7 +46,15 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { batch_2.foreach(res => res("year") === "2021") batch_2.foreach(res => res("total_records") === "1") batch_2.foreach(res => res("week_of_year") === "32") + } + it should "Should able to fetch the archived records from the azure and delete the records" in { + implicit val mockFc: FrameworkContext = mock[FrameworkContext] + val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"archivalFetcherConfig":{"store":"local","format":"csv.gz","filePath":"src/test/resources/assessment-archival/archival-data/","container":""},"deleteArchivedBatch":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin + implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) + val reportData = AssessmentArchivalJob.removeRecords() } + + } \ No newline at end of file From 0425f1c7a0be43bc32edf291e305de2b0ec26acb Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Thu, 19 Aug 2021 16:02:55 +0530 Subject: [PATCH 42/55] Issue SB-24793 feat: Assessment archived data removal logic --- .../analytics/exhaust/util/ExhaustUtil.scala | 7 ++- .../job/report/AssessmentArchivalJob.scala | 56 ++++++++++++++----- .../report/TestAssessmentArchivalJob.scala | 3 +- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 06048d45c..467c48808 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -9,10 +9,11 @@ import org.ekstep.analytics.framework.util.JobLogger object ExhaustUtil { def getArchivedData(store: String, filePath: String, bucket: String, blobFields: Map[String, String], fileFormat: Option[String])(implicit spark: SparkSession, fc: FrameworkContext): DataFrame = { + val filteredBlobFields = blobFields.filter(_._2 != null) val format = fileFormat.getOrElse("csv.gz") - val batchId = blobFields.getOrElse("batchId", "*") - val year = blobFields.getOrElse("year", "*") - val weekNumb = blobFields.getOrElse("weekNum", "*") + val batchId = filteredBlobFields.getOrElse("batchId", "*") + val year = filteredBlobFields.getOrElse("year", "*") + val weekNumb = filteredBlobFields.getOrElse("weekNum", "*") val url = store match { diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index a1e79c4b6..7bfd35264 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -1,18 +1,18 @@ package org.sunbird.analytics.job.report import com.datastax.spark.connector.cql.CassandraConnectorConf -import com.datastax.spark.connector.{SomeColumns, toRDDFunctions} +import com.datastax.spark.connector.toRDDFunctions import org.apache.spark.SparkContext import org.apache.spark.sql.cassandra.CassandraSparkSessionFunctions import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.Level.INFO -import org.ekstep.analytics.framework.Period.WEEK import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.DatasetUtil.extensions import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} import org.ekstep.analytics.framework.{FrameworkContext, IJob, JobConfig} +import org.joda.time.{DateTime, LocalDate} import org.sunbird.analytics.exhaust.util.ExhaustUtil import java.util.concurrent.atomic.AtomicInteger @@ -26,6 +26,8 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep case class BatchPartition(batch_id: String, year: Int, week_of_year: Int) + case class Period(year: Int, week_of_year: Int) + // $COVERAGE-OFF$ Disabling scoverage for main and execute method override def main(config: String)(implicit sc: Option[SparkContext], fc: Option[FrameworkContext]): Unit = { implicit val className: String = "org.sunbird.analytics.job.report.AssessmentArchivalJob" @@ -39,7 +41,13 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val deleteArchivedBatch: Boolean = modelParams.getOrElse("deleteArchivedBatch", false).asInstanceOf[Boolean] init() try { - val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords) else CommonUtil.time(archiveData(spark, jobConfig)) + + // Below config is optional, If the user wants to removed the archived data for a specific date or Batch Id + // NOTE - By default the records will be removed only for the previous week num of archived data only. + val archivedDate = modelParams.getOrElse("archivedDate", null).asInstanceOf[String] + val archivedBatch = modelParams.getOrElse("archivedBatch", null).asInstanceOf[String] + + val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(archivedDate, Some(archivedBatch))) else CommonUtil.time(archiveData(spark, jobConfig)) val total_archived_files = res._2.length JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) } catch { @@ -59,10 +67,13 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(sparkSession: SparkSession, jobConfig: JobConfig): Array[Map[String, Any]] = { + // Read the batches information to archive val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList.filter(x => x.nonEmpty) + // Get the assessment Data val assessmentDF: DataFrame = getAssessmentData(sparkSession, batches) + //Get the Week Num & Year Value for Based on the updated_on value column val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) @@ -72,10 +83,12 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val batchesToArchiveCount = new AtomicInteger(archiveBatchList.length) JobLogger.log(s"Total Batches to Archive By Year & Week $batchesToArchiveCount", None, INFO) + // Loop through the batches to archive list val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])).groupBy(_.batch_id) batchesToArchive.flatMap(batches => { val processingBatch = new AtomicInteger(batches._2.length) JobLogger.log(s"Started Processing to archive the data", Some(Map("batch_id" -> batches._1, "total_part_files_to_archive" -> batches._2.length)), INFO) + // Loop through the week_num & year batch partition val res = for (batch <- batches._2) yield { val filteredDF = assessmentData.filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year).select(columnWithOrder.head, columnWithOrder.tail: _*) upload(filteredDF, batch, jobConfig) @@ -89,25 +102,38 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep }).toArray } - def removeRecords()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[Map[String, Any]] = { - val assessmentDF = fetchRecords() - println("assessmentDF" + assessmentDF.show(false)) - val batchData = assessmentDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").rdd - //batchData.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) - JobLogger.log(s"Deleted ${batchData.count} records for the batch from the DB", None, INFO) - Array() + // Delete the records for the archived batch data. + // Date - YYYY-MM-DD Format + def removeRecords(date: String, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[Map[String, Any]] = { + val period: Period = getWeekAndYearVal(date) + val archivedDataDF = fetchArchivedBatches(period, batchId) + val archivedDataRDD = archivedDataDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").rdd + val totalArchivedRecords = archivedDataRDD.count + archivedDataRDD.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) + JobLogger.log(s"Deleted $totalArchivedRecords records for the batch from the DB", None, INFO) + Array(Map("deleted_records" -> totalArchivedRecords)) + } + + // Date - YYYY-MM-DD Format + def getWeekAndYearVal(date: String): Period = { + if (null != date && date.nonEmpty) { + val dt = new DateTime(date) + Period(year = dt.getYear, week_of_year = dt.getWeekOfWeekyear) + } else { + val today = new DateTime() + val lastWeek = today.minusWeeks(1) // Get always for the previous week of the current + Period(year = lastWeek.getYear, week_of_year = lastWeek.getWeekOfWeekyear) + } } - def fetchRecords()(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { + def fetchArchivedBatches(period: Period, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { val azureFetcherConfig = config.modelParams.get("archivalFetcherConfig").asInstanceOf[Map[String, AnyRef]] val store = azureFetcherConfig("store").asInstanceOf[String] val format: String = azureFetcherConfig.getOrElse("format", "csv.gz").asInstanceOf[String] val filePath = azureFetcherConfig.getOrElse("filePath", "archival-data/").asInstanceOf[String] val container = azureFetcherConfig.getOrElse("container", "reports").asInstanceOf[String] - import org.joda.time.{DateTime, DateTimeZone, Days, LocalDate, Weeks, Years} - val s = CommonUtil.getPeriod(System.currentTimeMillis, WEEK) - println("week num" + s) - val blobFields = Map("year" -> "2022", "weekNum" -> "32") + val blobFields = Map("year" -> period.year.toString, "weekNum" -> period.week_of_year.toString, "batchId" -> batchId.orNull) + JobLogger.log(s"Fetching a archived records", Some(blobFields), INFO) ExhaustUtil.getArchivedData(store, filePath, container, blobFields, Some(format)) } diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index be96a536e..a6544a968 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -52,7 +52,8 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"archivalFetcherConfig":{"store":"local","format":"csv.gz","filePath":"src/test/resources/assessment-archival/archival-data/","container":""},"deleteArchivedBatch":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.removeRecords() + val reportData = AssessmentArchivalJob.removeRecords("2021-08-18", None) + reportData.head("deleted_records") should be(6) } From ca2d590c753e2aa33a4831f2c1391af2fd77610e Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Sun, 22 Aug 2021 10:37:39 +0530 Subject: [PATCH 43/55] Issue SB-24793 feat: Assessment archived data removal config update --- .../analytics/job/report/AssessmentArchivalJob.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 7bfd35264..5b9855874 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -105,7 +105,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // Delete the records for the archived batch data. // Date - YYYY-MM-DD Format def removeRecords(date: String, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[Map[String, Any]] = { - val period: Period = getWeekAndYearVal(date) + val period: Period = getWeekAndYearVal(date) // Date is optional, By default it will provide the previous week num of current year val archivedDataDF = fetchArchivedBatches(period, batchId) val archivedDataRDD = archivedDataDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").rdd val totalArchivedRecords = archivedDataRDD.count @@ -129,8 +129,8 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def fetchArchivedBatches(period: Period, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { val azureFetcherConfig = config.modelParams.get("archivalFetcherConfig").asInstanceOf[Map[String, AnyRef]] val store = azureFetcherConfig("store").asInstanceOf[String] - val format: String = azureFetcherConfig.getOrElse("format", "csv.gz").asInstanceOf[String] - val filePath = azureFetcherConfig.getOrElse("filePath", "archival-data/").asInstanceOf[String] + val format: String = azureFetcherConfig.getOrElse("blobExt", "csv.gz").asInstanceOf[String] + val filePath = azureFetcherConfig.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = azureFetcherConfig.getOrElse("container", "reports").asInstanceOf[String] val blobFields = Map("year" -> period.year.toString, "weekNum" -> period.week_of_year.toString, "batchId" -> batchId.orNull) JobLogger.log(s"Fetching a archived records", Some(blobFields), INFO) @@ -152,7 +152,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep batch: BatchPartition, jobConfig: JobConfig): List[String] = { val modelParams = jobConfig.modelParams.get - val reportPath: String = modelParams.getOrElse("reportPath", "archival-data/").asInstanceOf[String] + val reportPath: String = modelParams.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = AppConf.getConfig("cloud.container.reports") val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") val fileName = s"${batch.batch_id}/${batch.year}-${batch.week_of_year}" From acbbd193a0edd547634ee45b03a497aa20a4fc52 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 22 Sep 2021 12:17:40 +0530 Subject: [PATCH 44/55] Issue SB-24793 feat: --- .../job/report/AssessmentArchivalJob.scala | 106 ++++++++++++------ .../report/TestAssessmentArchivalJob.scala | 23 ++-- 2 files changed, 80 insertions(+), 49 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 5b9855874..158a1da07 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -1,7 +1,7 @@ package org.sunbird.analytics.job.report import com.datastax.spark.connector.cql.CassandraConnectorConf -import com.datastax.spark.connector.toRDDFunctions +import com.datastax.spark.connector.{SomeColumns, toRDDFunctions} import org.apache.spark.SparkContext import org.apache.spark.sql.cassandra.CassandraSparkSessionFunctions import org.apache.spark.sql.functions._ @@ -12,7 +12,7 @@ import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.DatasetUtil.extensions import org.ekstep.analytics.framework.util.{CommonUtil, JSONUtils, JobLogger} import org.ekstep.analytics.framework.{FrameworkContext, IJob, JobConfig} -import org.joda.time.{DateTime, LocalDate} +import org.joda.time.DateTime import org.sunbird.analytics.exhaust.util.ExhaustUtil import java.util.concurrent.atomic.AtomicInteger @@ -24,9 +24,20 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep private val partitionCols = List("batch_id", "year", "week_of_year") private val columnWithOrder = List("course_id", "batch_id", "user_id", "content_id", "attempt_id", "created_on", "grand_total", "last_attempted_on", "total_max_score", "total_score", "updated_on", "question") - case class BatchPartition(batch_id: String, year: Int, week_of_year: Int) + case class Period(year: Int, + weekOfYear: Int) + + case class BatchPartition(batchId: String, period: Period) + + + case class ArchivalMetrics(batchId: Option[String], + period: Period, + totalArchivedRecords: Option[Long], + pendingWeeksOfYears: Option[Long], + totalDeletedRecords: Option[Long], + totalDistinctBatches: Long + ) - case class Period(year: Int, week_of_year: Int) // $COVERAGE-OFF$ Disabling scoverage for main and execute method override def main(config: String)(implicit sc: Option[SparkContext], fc: Option[FrameworkContext]): Unit = { @@ -42,19 +53,19 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep init() try { - // Below config is optional, If the user wants to removed the archived data for a specific date or Batch Id - // NOTE - By default the records will be removed only for the previous week num of archived data only. - val archivedDate = modelParams.getOrElse("archivedDate", null).asInstanceOf[String] - val archivedBatch = modelParams.getOrElse("archivedBatch", null).asInstanceOf[String] - - val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(archivedDate, Some(archivedBatch))) else CommonUtil.time(archiveData(spark, jobConfig)) + /** + * Below date and batchId configs are optional. By default, + * The Job will remove the records for the last week. + */ + val date = modelParams.getOrElse("date", null).asInstanceOf[String] + val batchIds = modelParams.getOrElse("batchIds", null).asInstanceOf[List[String]] + val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(date, Some(batchIds))) else CommonUtil.time(archiveData(Some(batchIds))) val total_archived_files = res._2.length - JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "total_archived_files" -> total_archived_files))) + JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "archived_details" -> res._2, "total_archived_files" -> total_archived_files))) } catch { - case ex: Exception => { + case ex: Exception => ex.printStackTrace() JobLogger.end(s"$jobName completed execution with the error ${ex.getMessage}", "FAILED", None) - } } finally { frameworkContext.closeContext() spark.close() @@ -66,12 +77,9 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } // $COVERAGE-ON$ - def archiveData(sparkSession: SparkSession, jobConfig: JobConfig): Array[Map[String, Any]] = { - // Read the batches information to archive - val batches: List[String] = AppConf.getConfig("assessment.batches").split(",").toList.filter(x => x.nonEmpty) - + def archiveData(batchIds: Option[List[String]])(implicit spark: SparkSession, config: JobConfig): Array[ArchivalMetrics] = { // Get the assessment Data - val assessmentDF: DataFrame = getAssessmentData(sparkSession, batches) + val assessmentDF: DataFrame = getAssessmentData(spark, batchIds.getOrElse(List())) //Get the Week Num & Year Value for Based on the updated_on value column val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) @@ -84,45 +92,63 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.log(s"Total Batches to Archive By Year & Week $batchesToArchiveCount", None, INFO) // Loop through the batches to archive list - val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int])).groupBy(_.batch_id) - batchesToArchive.flatMap(batches => { + val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], Period(f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int]))).groupBy(_.batchId) + val archivalStatus: Array[ArchivalMetrics] = batchesToArchive.flatMap(batches => { val processingBatch = new AtomicInteger(batches._2.length) JobLogger.log(s"Started Processing to archive the data", Some(Map("batch_id" -> batches._1, "total_part_files_to_archive" -> batches._2.length)), INFO) // Loop through the week_num & year batch partition - val res = for (batch <- batches._2) yield { - val filteredDF = assessmentData.filter(col("batch_id") === batch.batch_id && col("year") === batch.year && col("week_of_year") === batch.week_of_year).select(columnWithOrder.head, columnWithOrder.tail: _*) - upload(filteredDF, batch, jobConfig) - val metrics = Map("batch_id" -> batch.batch_id, "year" -> batch.year, "week_of_year" -> batch.week_of_year, "pending_part_files" -> processingBatch.getAndDecrement(), "total_records" -> filteredDF.count()) + val res = for (batch <- batches._2.asInstanceOf[Array[BatchPartition]]) yield { + val filteredDF = assessmentData.filter(col("batch_id") === batch.batchId && col("year") === batch.period.year && col("week_of_year") === batch.period.weekOfYear).select(columnWithOrder.head, columnWithOrder.tail: _*) + upload(filteredDF, batch, config) + + val metrics = ArchivalMetrics(batchId = Some(batch.batchId), Period(year = batch.period.year, weekOfYear = batch.period.weekOfYear), + pendingWeeksOfYears = Some(processingBatch.getAndDecrement()), totalArchivedRecords = Some(filteredDF.count()), totalDeletedRecords = None, totalDistinctBatches = filteredDF.select("batch_id").distinct().count()) + JobLogger.log(s"Data is archived and Processing the remaining part files ", Some(metrics), INFO) - assessmentData.unpersist() metrics } JobLogger.log(s"${batches._1} is successfully archived", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) res }).toArray + assessmentData.unpersist() + archivalStatus } // Delete the records for the archived batch data. // Date - YYYY-MM-DD Format - def removeRecords(date: String, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[Map[String, Any]] = { + // Batch IDs are optional + def removeRecords(date: String, batchIds: Option[List[String]])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[ArchivalMetrics] = { val period: Period = getWeekAndYearVal(date) // Date is optional, By default it will provide the previous week num of current year + val res = if (batchIds.nonEmpty) { + for (batchId <- batchIds.getOrElse(List())) yield { + remove(period, Some(batchId)) + } + } else { + List(remove(period, None)) + } + res.toArray + } + + def remove(period: Period, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): ArchivalMetrics = { val archivedDataDF = fetchArchivedBatches(period, batchId) - val archivedDataRDD = archivedDataDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id").rdd - val totalArchivedRecords = archivedDataRDD.count - archivedDataRDD.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table")) - JobLogger.log(s"Deleted $totalArchivedRecords records for the batch from the DB", None, INFO) - Array(Map("deleted_records" -> totalArchivedRecords)) + val archivedData = archivedDataDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id") + val totalArchivedRecords: Long = archivedData.count + val totalDistinctBatches: Long = archivedData.select("batch_id").distinct().count() + JobLogger.log(s"Deleting $totalArchivedRecords records for the year ${period.year} and week of year ${period.weekOfYear} from the DB ", None, INFO) + archivedData.rdd.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table"), keyColumns = SomeColumns("course_id", "batch_id", "user_id", "content_id", "attempt_id")) + ArchivalMetrics(batchId = batchId, Period(year = period.year, weekOfYear = period.weekOfYear), + pendingWeeksOfYears = None, totalArchivedRecords = Some(totalArchivedRecords), totalDeletedRecords = Some(totalArchivedRecords), totalDistinctBatches = totalDistinctBatches) } // Date - YYYY-MM-DD Format def getWeekAndYearVal(date: String): Period = { if (null != date && date.nonEmpty) { val dt = new DateTime(date) - Period(year = dt.getYear, week_of_year = dt.getWeekOfWeekyear) + Period(year = dt.getYear, weekOfYear = dt.getWeekOfWeekyear) } else { val today = new DateTime() val lastWeek = today.minusWeeks(1) // Get always for the previous week of the current - Period(year = lastWeek.getYear, week_of_year = lastWeek.getWeekOfWeekyear) + Period(year = lastWeek.getYear, weekOfYear = lastWeek.getWeekOfWeekyear) } } @@ -132,7 +158,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val format: String = azureFetcherConfig.getOrElse("blobExt", "csv.gz").asInstanceOf[String] val filePath = azureFetcherConfig.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = azureFetcherConfig.getOrElse("container", "reports").asInstanceOf[String] - val blobFields = Map("year" -> period.year.toString, "weekNum" -> period.week_of_year.toString, "batchId" -> batchId.orNull) + val blobFields = Map("year" -> period.year.toString, "weekNum" -> period.weekOfYear.toString, "batchId" -> batchId.orNull) JobLogger.log(s"Fetching a archived records", Some(blobFields), INFO) ExhaustUtil.getArchivedData(store, filePath, container, blobFields, Some(format)) } @@ -141,8 +167,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep import spark.implicits._ val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) if (batchIds.nonEmpty) { - val batchListDF = batchIds.asInstanceOf[List[String]].toDF("batch_id") - assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() + if (batchIds.size > 1) { + val batchListDF = batchIds.toDF("batch_id") + assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() + } + else { + assessmentDF.filter(col("batch_id") === batchIds).persist() + } + } else { assessmentDF } @@ -155,7 +187,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val reportPath: String = modelParams.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = AppConf.getConfig("cloud.container.reports") val objectKey = AppConf.getConfig("course.metrics.cloud.objectKey") - val fileName = s"${batch.batch_id}/${batch.year}-${batch.week_of_year}" + val fileName = s"${batch.batchId}/${batch.period.year}-${batch.period.weekOfYear}" val storageConfig = getStorageConfig( container, objectKey, diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index a6544a968..566409ef9 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -1,7 +1,6 @@ package org.sunbird.analytics.job.report -import org.apache.spark.SparkContext import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.{HadoopFileUtil, JSONUtils} @@ -35,25 +34,25 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.archiveData(spark, jobConfig) + val reportData = AssessmentArchivalJob.archiveData(None) - val batch_1 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "batch-001") - batch_1.foreach(res => res("year") === "2021") - batch_1.foreach(res => res("total_records") === "5") - batch_1.foreach(res => res("week_of_year") === "32") + val batch_1 = reportData.filter(x => x.batchId.getOrElse("") === "batch-001") + batch_1.foreach(res => res.period.year === "2021") + batch_1.foreach(res => res.totalArchivedRecords === "5") + batch_1.foreach(res => res.period.weekOfYear === "32") - val batch_2 = reportData.filter(x => x.getOrElse("batch_id", "").asInstanceOf[String] === "batch-004") - batch_2.foreach(res => res("year") === "2021") - batch_2.foreach(res => res("total_records") === "1") - batch_2.foreach(res => res("week_of_year") === "32") + val batch_2 = reportData.filter(x => x.batchId.getOrElse("") === "batch-004") + batch_2.foreach(res => res.period.year === "2021") + batch_2.foreach(res => res.totalArchivedRecords === "1") + batch_2.foreach(res => res.period.weekOfYear === "32") } it should "Should able to fetch the archived records from the azure and delete the records" in { implicit val mockFc: FrameworkContext = mock[FrameworkContext] - val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"archivalFetcherConfig":{"store":"local","format":"csv.gz","filePath":"src/test/resources/assessment-archival/archival-data/","container":""},"deleteArchivedBatch":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin + val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"archivalFetcherConfig":{"store":"local","format":"csv.gz","reportPath":"src/test/resources/assessment-archival/archival-data/","container":""},"deleteArchivedBatch":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) val reportData = AssessmentArchivalJob.removeRecords("2021-08-18", None) - reportData.head("deleted_records") should be(6) + reportData.head.totalDeletedRecords.get should be(6) } From 114f7fed7fa361de9997c1c6aab167dd1b4298b2 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Wed, 22 Sep 2021 12:26:02 +0530 Subject: [PATCH 45/55] Issue SB-24793 feat: Updated the testcase and renamed the variables --- .../job/report/AssessmentArchivalJob.scala | 29 +++++++++---------- .../analytics/job/report/BaseReportSpec.scala | 12 ++++---- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 158a1da07..e6d79acaf 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -88,8 +88,8 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep .withColumn("question", to_json(col("question"))) val archiveBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() - val batchesToArchiveCount = new AtomicInteger(archiveBatchList.length) - JobLogger.log(s"Total Batches to Archive By Year & Week $batchesToArchiveCount", None, INFO) + val totalBatchesToArchive = new AtomicInteger(archiveBatchList.length) + JobLogger.log(s"Total Batches to Archive By Year & Week $totalBatchesToArchive", None, INFO) // Loop through the batches to archive list val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], Period(f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int]))).groupBy(_.batchId) @@ -107,7 +107,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.log(s"Data is archived and Processing the remaining part files ", Some(metrics), INFO) metrics } - JobLogger.log(s"${batches._1} is successfully archived", Some(Map("batch_id" -> batches._1, "pending_batches" -> batchesToArchiveCount.getAndDecrement())), INFO) + JobLogger.log(s"${batches._1} is successfully archived", Some(Map("batch_id" -> batches._1, "pending_batches" -> totalBatchesToArchive.getAndDecrement())), INFO) res }).toArray assessmentData.unpersist() @@ -140,18 +140,6 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep pendingWeeksOfYears = None, totalArchivedRecords = Some(totalArchivedRecords), totalDeletedRecords = Some(totalArchivedRecords), totalDistinctBatches = totalDistinctBatches) } - // Date - YYYY-MM-DD Format - def getWeekAndYearVal(date: String): Period = { - if (null != date && date.nonEmpty) { - val dt = new DateTime(date) - Period(year = dt.getYear, weekOfYear = dt.getWeekOfWeekyear) - } else { - val today = new DateTime() - val lastWeek = today.minusWeeks(1) // Get always for the previous week of the current - Period(year = lastWeek.getYear, weekOfYear = lastWeek.getWeekOfWeekyear) - } - } - def fetchArchivedBatches(period: Period, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { val azureFetcherConfig = config.modelParams.get("archivalFetcherConfig").asInstanceOf[Map[String, AnyRef]] val store = azureFetcherConfig("store").asInstanceOf[String] @@ -195,5 +183,16 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.log(s"Uploading reports to blob storage", None, INFO) archivedData.saveToBlobStore(storageConfig = storageConfig, format = "csv", reportId = s"$reportPath$fileName-${System.currentTimeMillis()}", options = Option(Map("header" -> "true", "codec" -> "org.apache.hadoop.io.compress.GzipCodec")), partitioningColumns = None, fileExt = Some("csv.gz")) } + // Date - YYYY-MM-DD Format + def getWeekAndYearVal(date: String): Period = { + if (null != date && date.nonEmpty) { + val dt = new DateTime(date) + Period(year = dt.getYear, weekOfYear = dt.getWeekOfWeekyear) + } else { + val today = new DateTime() + val lastWeek = today.minusWeeks(1) // Get always for the previous week of the current + Period(year = lastWeek.getYear, weekOfYear = lastWeek.getWeekOfWeekyear) + } + } } diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala index 430ece00f..8e53554bc 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/BaseReportSpec.scala @@ -17,16 +17,16 @@ class BaseReportSpec extends BaseSpec with BeforeAndAfterAll { val cbatchAliasMapping = spark.sparkContext.textFile("src/test/resources/reports/cbatch_alias_mapping.json", 1).collect().head val cbatchAssessmentAliasMapping = spark.sparkContext.textFile("src/test/resources/reports/cbatch_assessment_alias_mapping.json", 1).collect().head -// EmbeddedES.start( -// Array( -// EsIndex("compositesearch", Option("cs"), Option(csMapping), None), -// EsIndex("cbatch", None, None, Option(cbatchAliasMapping)), -// EsIndex("cbatch-assessment-08-07-2018-11-00", None, None, Option(cbatchAssessmentAliasMapping)))) + EmbeddedES.start( + Array( + EsIndex("compositesearch", Option("cs"), Option(csMapping), None), + EsIndex("cbatch", None, None, Option(cbatchAliasMapping)), + EsIndex("cbatch-assessment-08-07-2018-11-00", None, None, Option(cbatchAssessmentAliasMapping)))) } override def afterAll() { //super.afterAll(); - //EmbeddedES.stop() + EmbeddedES.stop() } } \ No newline at end of file From 74aa110b4b64d9b9ebf9e4301f015d05b46a277a Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Thu, 23 Sep 2021 12:47:38 +0530 Subject: [PATCH 46/55] Issue SB-24793 feat: Enabled the archival job to run for a specific batchId or date to archive the data. if the date has not provided and archiveForLastWeek = true then it will by default it will archive the data for the last week records --- .../analytics/exhaust/util/ExhaustUtil.scala | 2 +- .../job/report/AssessmentArchivalJob.scala | 68 +++++++++++-------- .../report/TestAssessmentArchivalJob.scala | 4 +- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 467c48808..01402911c 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -8,7 +8,7 @@ import org.ekstep.analytics.framework.util.JobLogger object ExhaustUtil { - def getArchivedData(store: String, filePath: String, bucket: String, blobFields: Map[String, String], fileFormat: Option[String])(implicit spark: SparkSession, fc: FrameworkContext): DataFrame = { + def getArchivedData(store: String, filePath: String, bucket: String, blobFields: Map[String, Any], fileFormat: Option[String])(implicit spark: SparkSession, fc: FrameworkContext): DataFrame = { val filteredBlobFields = blobFields.filter(_._2 != null) val format = fileFormat.getOrElse("csv.gz") val batchId = filteredBlobFields.getOrElse("batchId", "*") diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index e6d79acaf..4a65fbc68 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -24,8 +24,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep private val partitionCols = List("batch_id", "year", "week_of_year") private val columnWithOrder = List("course_id", "batch_id", "user_id", "content_id", "attempt_id", "created_on", "grand_total", "last_attempted_on", "total_max_score", "total_score", "updated_on", "question") - case class Period(year: Int, - weekOfYear: Int) + case class Period(year: Int, weekOfYear: Int) case class BatchPartition(batchId: String, period: Period) @@ -59,9 +58,10 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep */ val date = modelParams.getOrElse("date", null).asInstanceOf[String] val batchIds = modelParams.getOrElse("batchIds", null).asInstanceOf[List[String]] - val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(date, Some(batchIds))) else CommonUtil.time(archiveData(Some(batchIds))) - val total_archived_files = res._2.length - JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "archived_details" -> res._2, "total_archived_files" -> total_archived_files))) + val archiveForLastWeek: Boolean = modelParams.getOrElse("archiveForLastWeek", true).asInstanceOf[Boolean] + + val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(date, Some(batchIds), archiveForLastWeek)) else CommonUtil.time(archiveData(date, Some(batchIds), archiveForLastWeek)) + JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "archived_details" -> res._2, "total_archived_files" -> res._2.length))) } catch { case ex: Exception => ex.printStackTrace() @@ -77,19 +77,24 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } // $COVERAGE-ON$ - def archiveData(batchIds: Option[List[String]])(implicit spark: SparkSession, config: JobConfig): Array[ArchivalMetrics] = { + def archiveData(date: String, batchIds: Option[List[String]], archiveForLastWeek: Boolean)(implicit spark: SparkSession, config: JobConfig): Array[ArchivalMetrics] = { // Get the assessment Data val assessmentDF: DataFrame = getAssessmentData(spark, batchIds.getOrElse(List())) - + val period = getWeekAndYearVal(date, archiveForLastWeek) //Get the Week Num & Year Value for Based on the updated_on value column val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) .withColumn("year", year(col("updated_on"))) .withColumn("week_of_year", weekofyear(col("updated_on"))) .withColumn("question", to_json(col("question"))) - val archiveBatchList = assessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() + /** + * The below filter is required, If we want to archive the data for a specific week of year and year + */ + val filteredAssessmentData = if (!isEmptyPeriod(period)) assessmentData.filter(col("year") === period.year).filter(col("week_of_year") === period.weekOfYear) else assessmentData + + val archiveBatchList = filteredAssessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() val totalBatchesToArchive = new AtomicInteger(archiveBatchList.length) - JobLogger.log(s"Total Batches to Archive By Year & Week $totalBatchesToArchive", None, INFO) + JobLogger.log(s"Total Batches to Archive is $totalBatchesToArchive for a period $period", None, INFO) // Loop through the batches to archive list val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], Period(f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int]))).groupBy(_.batchId) @@ -99,11 +104,9 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // Loop through the week_num & year batch partition val res = for (batch <- batches._2.asInstanceOf[Array[BatchPartition]]) yield { val filteredDF = assessmentData.filter(col("batch_id") === batch.batchId && col("year") === batch.period.year && col("week_of_year") === batch.period.weekOfYear).select(columnWithOrder.head, columnWithOrder.tail: _*) - upload(filteredDF, batch, config) - + upload(filteredDF, batch) // Upload the archived files into blob store val metrics = ArchivalMetrics(batchId = Some(batch.batchId), Period(year = batch.period.year, weekOfYear = batch.period.weekOfYear), pendingWeeksOfYears = Some(processingBatch.getAndDecrement()), totalArchivedRecords = Some(filteredDF.count()), totalDeletedRecords = None, totalDistinctBatches = filteredDF.select("batch_id").distinct().count()) - JobLogger.log(s"Data is archived and Processing the remaining part files ", Some(metrics), INFO) metrics } @@ -111,14 +114,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep res }).toArray assessmentData.unpersist() - archivalStatus + archivalStatus // List of metrics } // Delete the records for the archived batch data. // Date - YYYY-MM-DD Format // Batch IDs are optional - def removeRecords(date: String, batchIds: Option[List[String]])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[ArchivalMetrics] = { - val period: Period = getWeekAndYearVal(date) // Date is optional, By default it will provide the previous week num of current year + def removeRecords(date: String, batchIds: Option[List[String]], archiveForLastWeek: Boolean)(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): Array[ArchivalMetrics] = { + val period: Period = getWeekAndYearVal(date, archiveForLastWeek) // Date is optional, By default it will provide the previous week num of current year val res = if (batchIds.nonEmpty) { for (batchId <- batchIds.getOrElse(List())) yield { remove(period, Some(batchId)) @@ -141,12 +144,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } def fetchArchivedBatches(period: Period, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { - val azureFetcherConfig = config.modelParams.get("archivalFetcherConfig").asInstanceOf[Map[String, AnyRef]] - val store = azureFetcherConfig("store").asInstanceOf[String] + val modelParams = config.modelParams.get + val azureFetcherConfig = modelParams.getOrElse("archivalFetcherConfig", Map()).asInstanceOf[Map[String, AnyRef]] + println("azureFetcherConfig" + azureFetcherConfig) + val store = azureFetcherConfig.getOrElse("store", "local").asInstanceOf[String] val format: String = azureFetcherConfig.getOrElse("blobExt", "csv.gz").asInstanceOf[String] val filePath = azureFetcherConfig.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = azureFetcherConfig.getOrElse("container", "reports").asInstanceOf[String] - val blobFields = Map("year" -> period.year.toString, "weekNum" -> period.weekOfYear.toString, "batchId" -> batchId.orNull) + val blobFields = Map("year" -> period.year, "weekNum" -> period.weekOfYear, "batchId" -> batchId.orNull) JobLogger.log(s"Fetching a archived records", Some(blobFields), INFO) ExhaustUtil.getArchivedData(store, filePath, container, blobFields, Some(format)) } @@ -157,20 +162,18 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep if (batchIds.nonEmpty) { if (batchIds.size > 1) { val batchListDF = batchIds.toDF("batch_id") - assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() + assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() } else { - assessmentDF.filter(col("batch_id") === batchIds).persist() + assessmentDF.filter(col("batch_id") === batchIds.head).persist() } - } else { assessmentDF } } def upload(archivedData: DataFrame, - batch: BatchPartition, - jobConfig: JobConfig): List[String] = { + batch: BatchPartition)(implicit jobConfig: JobConfig): List[String] = { val modelParams = jobConfig.modelParams.get val reportPath: String = modelParams.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = AppConf.getConfig("cloud.container.reports") @@ -183,16 +186,25 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep JobLogger.log(s"Uploading reports to blob storage", None, INFO) archivedData.saveToBlobStore(storageConfig = storageConfig, format = "csv", reportId = s"$reportPath$fileName-${System.currentTimeMillis()}", options = Option(Map("header" -> "true", "codec" -> "org.apache.hadoop.io.compress.GzipCodec")), partitioningColumns = None, fileExt = Some("csv.gz")) } + // Date - YYYY-MM-DD Format - def getWeekAndYearVal(date: String): Period = { - if (null != date && date.nonEmpty) { - val dt = new DateTime(date) - Period(year = dt.getYear, weekOfYear = dt.getWeekOfWeekyear) - } else { + def getWeekAndYearVal(date: String, archiveForLastWeek: Boolean): Period = { + if (archiveForLastWeek) { val today = new DateTime() val lastWeek = today.minusWeeks(1) // Get always for the previous week of the current Period(year = lastWeek.getYear, weekOfYear = lastWeek.getWeekOfWeekyear) + } else { + if (null != date && date.nonEmpty) { + val dt = new DateTime(date) + Period(year = dt.getYear, weekOfYear = dt.getWeekOfWeekyear) + } else { + Period(0, 0) + } } } + def isEmptyPeriod(period: Period): Boolean = { + if (period.year == 0 && period.weekOfYear == 0) true else false + } + } diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index 566409ef9..9eb5d245e 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -34,7 +34,7 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.archiveData(None) + val reportData = AssessmentArchivalJob.archiveData(date = null, None, archiveForLastWeek = true) val batch_1 = reportData.filter(x => x.batchId.getOrElse("") === "batch-001") batch_1.foreach(res => res.period.year === "2021") @@ -51,7 +51,7 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"archivalFetcherConfig":{"store":"local","format":"csv.gz","reportPath":"src/test/resources/assessment-archival/archival-data/","container":""},"deleteArchivedBatch":true,"sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.removeRecords("2021-08-18", None) + val reportData = AssessmentArchivalJob.removeRecords(date = "2021-08-18", None, archiveForLastWeek = false) reportData.head.totalDeletedRecords.get should be(6) } From 5a0d0d65108a7b4708e10c70ddd76d2bd287ce78 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Thu, 23 Sep 2021 18:25:44 +0530 Subject: [PATCH 47/55] Issue SB-24793 feat: Enabled the archival job to run for a specific batchId or date to archive the data. if the date has not provided and archiveForLastWeek = true then it will by default it will archive the data for the last week records --- .../analytics/exhaust/util/ExhaustUtil.scala | 2 +- .../job/report/AssessmentArchivalJob.scala | 31 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 01402911c..0a7ca06ac 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -23,7 +23,7 @@ object ExhaustUtil { case "s3" | "azure" => val key = AppConf.getConfig("azure_storage_key") val file = s"${filePath}${batchId}/${year}-${weekNumb}-*.${format}" - s"wasb://$bucket@$key.blob.core.windows.net/$file." + s"wasb://$bucket@$key.blob.core.windows.net/$file" // $COVERAGE-ON$ } diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 4a65fbc68..6b41a9a83 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -60,7 +60,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val batchIds = modelParams.getOrElse("batchIds", null).asInstanceOf[List[String]] val archiveForLastWeek: Boolean = modelParams.getOrElse("archiveForLastWeek", true).asInstanceOf[Boolean] - val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(date, Some(batchIds), archiveForLastWeek)) else CommonUtil.time(archiveData(date, Some(batchIds), archiveForLastWeek)) + val res = if (deleteArchivedBatch) CommonUtil.time(removeRecords(date, Some(batchIds), archiveForLastWeek)) else CommonUtil.time(archiveData(date, Option(batchIds), archiveForLastWeek)) JobLogger.end(s"$jobName completed execution", "SUCCESS", Option(Map("timeTaken" -> res._1, "archived_details" -> res._2, "total_archived_files" -> res._2.length))) } catch { case ex: Exception => @@ -79,7 +79,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep // $COVERAGE-ON$ def archiveData(date: String, batchIds: Option[List[String]], archiveForLastWeek: Boolean)(implicit spark: SparkSession, config: JobConfig): Array[ArchivalMetrics] = { // Get the assessment Data - val assessmentDF: DataFrame = getAssessmentData(spark, batchIds.getOrElse(List())) + val assessmentDF: DataFrame = getAssessmentData(spark, batchIds) val period = getWeekAndYearVal(date, archiveForLastWeek) //Get the Week Num & Year Value for Based on the updated_on value column val assessmentData = assessmentDF.withColumn("updated_on", to_timestamp(col("updated_on"))) @@ -88,7 +88,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep .withColumn("question", to_json(col("question"))) /** - * The below filter is required, If we want to archive the data for a specific week of year and year + * The below filter is required, If we want to archive the data for a specific week of year and year */ val filteredAssessmentData = if (!isEmptyPeriod(period)) assessmentData.filter(col("year") === period.year).filter(col("week_of_year") === period.weekOfYear) else assessmentData @@ -124,7 +124,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val period: Period = getWeekAndYearVal(date, archiveForLastWeek) // Date is optional, By default it will provide the previous week num of current year val res = if (batchIds.nonEmpty) { for (batchId <- batchIds.getOrElse(List())) yield { - remove(period, Some(batchId)) + remove(period, Option(batchId)) } } else { List(remove(period, None)) @@ -137,7 +137,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep val archivedData = archivedDataDF.select("course_id", "batch_id", "user_id", "content_id", "attempt_id") val totalArchivedRecords: Long = archivedData.count val totalDistinctBatches: Long = archivedData.select("batch_id").distinct().count() - JobLogger.log(s"Deleting $totalArchivedRecords records for the year ${period.year} and week of year ${period.weekOfYear} from the DB ", None, INFO) + JobLogger.log(s"Deleting $totalArchivedRecords archived records only, for the year ${period.year} and week of year ${period.weekOfYear} from the DB ", None, INFO) archivedData.rdd.deleteFromCassandra(AppConf.getConfig("sunbird.courses.keyspace"), AppConf.getConfig("sunbird.courses.assessment.table"), keyColumns = SomeColumns("course_id", "batch_id", "user_id", "content_id", "attempt_id")) ArchivalMetrics(batchId = batchId, Period(year = period.year, weekOfYear = period.weekOfYear), pendingWeeksOfYears = None, totalArchivedRecords = Some(totalArchivedRecords), totalDeletedRecords = Some(totalArchivedRecords), totalDistinctBatches = totalDistinctBatches) @@ -146,26 +146,27 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep def fetchArchivedBatches(period: Period, batchId: Option[String])(implicit spark: SparkSession, fc: FrameworkContext, config: JobConfig): DataFrame = { val modelParams = config.modelParams.get val azureFetcherConfig = modelParams.getOrElse("archivalFetcherConfig", Map()).asInstanceOf[Map[String, AnyRef]] - println("azureFetcherConfig" + azureFetcherConfig) - val store = azureFetcherConfig.getOrElse("store", "local").asInstanceOf[String] + val store = azureFetcherConfig.getOrElse("store", "azure").asInstanceOf[String] val format: String = azureFetcherConfig.getOrElse("blobExt", "csv.gz").asInstanceOf[String] val filePath = azureFetcherConfig.getOrElse("reportPath", "archived-data/").asInstanceOf[String] val container = azureFetcherConfig.getOrElse("container", "reports").asInstanceOf[String] - val blobFields = Map("year" -> period.year, "weekNum" -> period.weekOfYear, "batchId" -> batchId.orNull) - JobLogger.log(s"Fetching a archived records", Some(blobFields), INFO) + val blobFields = if (!isEmptyPeriod(period)) Map("year" -> period.year, "weekNum" -> period.weekOfYear, "batchId" -> batchId.orNull) + else Map("batchId" -> batchId.orNull) + JobLogger.log(s"Fetching a archived records only from the blob store", Some(Map("reportPath" -> filePath, "container" -> container) ++ blobFields), INFO) ExhaustUtil.getArchivedData(store, filePath, container, blobFields, Some(format)) } - def getAssessmentData(spark: SparkSession, batchIds: List[String]): DataFrame = { + def getAssessmentData(spark: SparkSession, batchIds: Option[List[String]]): DataFrame = { import spark.implicits._ val assessmentDF = fetchData(spark, assessmentAggDBSettings, cassandraUrl, new StructType()) - if (batchIds.nonEmpty) { - if (batchIds.size > 1) { - val batchListDF = batchIds.toDF("batch_id") - assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() + val batchIdentifiers = batchIds.getOrElse(List()) + if (batchIdentifiers.nonEmpty) { + if (batchIdentifiers.size > 1) { + val batchListDF = batchIdentifiers.toDF("batch_id") + assessmentDF.join(batchListDF, Seq("batch_id"), "inner").persist() } else { - assessmentDF.filter(col("batch_id") === batchIds.head).persist() + assessmentDF.filter(col("batch_id") === batchIdentifiers.head).persist() } } else { assessmentDF From ba954e51d590446ad8b4c026a06f90350a00a979 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 22 Oct 2021 10:56:45 +0530 Subject: [PATCH 48/55] Issue SB-24793 feat: fixed the review comments changes - added seperate case for s3 to handle. --- .../org/sunbird/analytics/exhaust/util/ExhaustUtil.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 0a7ca06ac..1cd7135b0 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -20,10 +20,13 @@ object ExhaustUtil { case "local" => filePath + s"${batchId}/${year}-${weekNumb}-*.${format}" // $COVERAGE-OFF$ for azure testing - case "s3" | "azure" => + case "azure" => val key = AppConf.getConfig("azure_storage_key") val file = s"${filePath}${batchId}/${year}-${weekNumb}-*.${format}" s"wasb://$bucket@$key.blob.core.windows.net/$file" + case "s3" => + //TODO - Need to support the S3 As well. + throw new Exception("s3 is currently not supported.") // $COVERAGE-ON$ } From bcd9c6eeb29d5bb8550b4d80036f877406e24698 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 22 Oct 2021 12:46:33 +0530 Subject: [PATCH 49/55] Issue SB-24793 feat: Assessment archival, fixed the review comments changes - Updated the test cases. --- .../report/TestAssessmentArchivalJob.scala | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index 9eb5d245e..beffc28e1 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -6,6 +6,7 @@ import org.ekstep.analytics.framework.conf.AppConf import org.ekstep.analytics.framework.util.{HadoopFileUtil, JSONUtils} import org.ekstep.analytics.framework.{FrameworkContext, JobConfig} import org.scalamock.scalatest.MockFactory +import org.sunbird.analytics.job.report.AssessmentArchivalJob.{Period, getWeekAndYearVal} import org.sunbird.analytics.util.EmbeddedCassandra @@ -30,21 +31,23 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { } - it should "Should able to archive the batch data" in { + it should "Should able to archive the batch data for a specific date" in { implicit val mockFc: FrameworkContext = mock[FrameworkContext] val strConfig = """{"search":{"type":"none"},"model":"org.sunbird.analytics.job.report.AssessmentArchivalJob","modelParams":{"deleteArchivedBatch":false,"store":"local","sparkCassandraConnectionHost":"{{ core_cassandra_host }}","fromDate":"$(date --date yesterday '+%Y-%m-%d')","toDate":"$(date --date yesterday '+%Y-%m-%d')"},"parallelization":8,"appName":"Assessment Archival Job"}""".stripMargin implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) - val reportData = AssessmentArchivalJob.archiveData(date = null, None, archiveForLastWeek = true) - + val todayDate: String = java.time.LocalDate.now.toString + val reportData = AssessmentArchivalJob.archiveData(date = todayDate, batchIds = None, archiveForLastWeek = false) val batch_1 = reportData.filter(x => x.batchId.getOrElse("") === "batch-001") - batch_1.foreach(res => res.period.year === "2021") - batch_1.foreach(res => res.totalArchivedRecords === "5") - batch_1.foreach(res => res.period.weekOfYear === "32") + val period: Period = getWeekAndYearVal(todayDate, archiveForLastWeek = false) + + batch_1.foreach(res => res.period.year should be(period.year)) + batch_1.foreach(res => res.totalArchivedRecords.get should be(5)) + batch_1.foreach(res => res.period.weekOfYear should be(period.weekOfYear)) val batch_2 = reportData.filter(x => x.batchId.getOrElse("") === "batch-004") - batch_2.foreach(res => res.period.year === "2021") - batch_2.foreach(res => res.totalArchivedRecords === "1") - batch_2.foreach(res => res.period.weekOfYear === "32") + batch_2.foreach(res => res.period.year should be(period.year)) + batch_2.foreach(res => res.totalArchivedRecords.get should be (1)) + batch_2.foreach(res => res.period.weekOfYear should be(period.weekOfYear)) } it should "Should able to fetch the archived records from the azure and delete the records" in { From c2456ee4bdaf3234dd950acce0d3144b3091a3fa Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 22 Oct 2021 12:52:10 +0530 Subject: [PATCH 50/55] Issue SB-24793 feat: Assessment archival, fixed the review comments changes - added the description --- .../analytics/job/report/AssessmentArchivalJob.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala index 6b41a9a83..4ad8e8b13 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/job/report/AssessmentArchivalJob.scala @@ -77,6 +77,7 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep } // $COVERAGE-ON$ + // date - yyyy-mm-dd, in string format def archiveData(date: String, batchIds: Option[List[String]], archiveForLastWeek: Boolean)(implicit spark: SparkSession, config: JobConfig): Array[ArchivalMetrics] = { // Get the assessment Data val assessmentDF: DataFrame = getAssessmentData(spark, batchIds) @@ -91,12 +92,14 @@ object AssessmentArchivalJob extends optional.Application with IJob with BaseRep * The below filter is required, If we want to archive the data for a specific week of year and year */ val filteredAssessmentData = if (!isEmptyPeriod(period)) assessmentData.filter(col("year") === period.year).filter(col("week_of_year") === period.weekOfYear) else assessmentData - + // Creating a batchId, year and weekOfYear Map from the filteredAssessmentData and loading into memory to iterate over the BatchId's + // Example - [BatchId, Year, WeekNumOfYear] -> Number of records + // 1. batch-001",2021,42 -> 5 val archiveBatchList = filteredAssessmentData.groupBy(partitionCols.head, partitionCols.tail: _*).count().collect() val totalBatchesToArchive = new AtomicInteger(archiveBatchList.length) JobLogger.log(s"Total Batches to Archive is $totalBatchesToArchive for a period $period", None, INFO) - // Loop through the batches to archive list + // Example Map - {"batch-001":[{"batchId":"batch-001","period":{"year":2021,"weekOfYear":42}}],"batch-004":[{"batchId":"batch-004","period":{"year":2021,"weekOfYear":42}}]} val batchesToArchive: Map[String, Array[BatchPartition]] = archiveBatchList.map(f => BatchPartition(f.get(0).asInstanceOf[String], Period(f.get(1).asInstanceOf[Int], f.get(2).asInstanceOf[Int]))).groupBy(_.batchId) val archivalStatus: Array[ArchivalMetrics] = batchesToArchive.flatMap(batches => { val processingBatch = new AtomicInteger(batches._2.length) From 700d3fcd3a5ee3f2c9acede1a08635d0b10162de Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Fri, 22 Oct 2021 12:56:58 +0530 Subject: [PATCH 51/55] Issue SB-24793 feat: Assessment archival fixed the review comments --- .../analytics/job/report/TestAssessmentArchivalJob.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala index beffc28e1..00fbe2fdb 100644 --- a/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala +++ b/data-products/src/test/scala/org/sunbird/analytics/job/report/TestAssessmentArchivalJob.scala @@ -37,9 +37,9 @@ class TestAssessmentArchivalJob extends BaseReportSpec with MockFactory { implicit val jobConfig: JobConfig = JSONUtils.deserialize[JobConfig](strConfig) val todayDate: String = java.time.LocalDate.now.toString val reportData = AssessmentArchivalJob.archiveData(date = todayDate, batchIds = None, archiveForLastWeek = false) - val batch_1 = reportData.filter(x => x.batchId.getOrElse("") === "batch-001") val period: Period = getWeekAndYearVal(todayDate, archiveForLastWeek = false) - + + val batch_1 = reportData.filter(x => x.batchId.getOrElse("") === "batch-001") batch_1.foreach(res => res.period.year should be(period.year)) batch_1.foreach(res => res.totalArchivedRecords.get should be(5)) batch_1.foreach(res => res.period.weekOfYear should be(period.weekOfYear)) From d04fd0d7fc136fea123b6b29057fa4a0e3c210e7 Mon Sep 17 00:00:00 2001 From: Manjunath Davanam Date: Mon, 25 Oct 2021 12:42:07 +0530 Subject: [PATCH 52/55] Issue SB-24793 feat: Adding the test csv files --- .../batch-001/2021-33-1629367970070.csv.gz | Bin 0 -> 884 bytes .../batch-004/2021-33-1629367971867.csv.gz | Bin 0 -> 196 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 data-products/src/test/resources/assessment-archival/archival-data/batch-001/2021-33-1629367970070.csv.gz create mode 100644 data-products/src/test/resources/assessment-archival/archival-data/batch-004/2021-33-1629367971867.csv.gz diff --git a/data-products/src/test/resources/assessment-archival/archival-data/batch-001/2021-33-1629367970070.csv.gz b/data-products/src/test/resources/assessment-archival/archival-data/batch-001/2021-33-1629367970070.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..28898c62972360eaab68827f2f2b9a29b35d1657 GIT binary patch literal 884 zcmV-)1B?70iwFP!000000PR-GZreB%-S;a9Z6h0eiINNyMbUMEnL#m|$Tk>>iG}Ln zkkllF;C~;Idd%349Xat#Hx&%}QaZdmI`>dyS~pUOb)NZmT&EiwG)hSOpVpNYm9`<* zT9jKCrc!V%vUOeg50Y2eTGyHve!-Pq_pt7%yT2~^^ID~~6n(#on{75Ed2EEzd0lNP?H#yDbxAr_$+Lx>m)J7zP}anwwJeFw%-6heX{NDa>P5Qem;%C*CLo{wPH&SBSJ zh|vs2GZcS=!GgdAGe!mB*I+cxp?Eb$hJ zXXe1R8Ziw68c-TCGa(9s#Pbu6DPAXnH%l?Cj~Z)p;4x#Z!`S+f{gldsr(AEqTMCYxz^4jz zT{~Q1u5)HNM!wT4?{I|q&d>-GYCgU!y0RnSoRB?B==%dpe5toTKz$Ep;H&*A0=*H& z97Rz-<<-MtkSVF2N6GIcPutnusC+p@>QU zi=KPCsgi#-=S!RNOD@yme9EbU{U+VkOu1Stxp4i!} zNB4I%e|#B#WU+Li{5O`2h=Wm+cSSsHSHuAe5D6J%m@~!u^vHeC?=&!3_qv zn$JIhlFRQP=K#S@?bSTVa$DjRS=T0)@2uV&Lqa!3eb@+lu&}$!$UUb6J`|vl8 K47w{_5C8xQ+O1~* literal 0 HcmV?d00001 diff --git a/data-products/src/test/resources/assessment-archival/archival-data/batch-004/2021-33-1629367971867.csv.gz b/data-products/src/test/resources/assessment-archival/archival-data/batch-004/2021-33-1629367971867.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5cc9276dda9f62651d28d85178e900529ff12be GIT binary patch literal 196 zcmV;#06YI5iwFP!0000005y)mZo?oDMDO_wUyGwzz%I^r^j1}f0!kt!kic5X=cfku zGW%wCv>Kn+Qlq^F@$1-MHdt#c+w?hcOuV6Ej`v4HU!vm}Y);}_Tnd&myPLSV%65tz}A%y0RR9m$XnR} literal 0 HcmV?d00001 From d143578d82ebb318b823b328708f01747ea56900 Mon Sep 17 00:00:00 2001 From: utk14 Date: Mon, 25 Oct 2021 14:30:30 +0530 Subject: [PATCH 53/55] Issue SB-24793 feat: Assessment archived data:: Review comments resolved --- .../analytics/exhaust/util/ExhaustUtil.scala | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 1cd7135b0..1426d045c 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -4,7 +4,7 @@ package org.sunbird.analytics.exhaust.util import org.apache.spark.sql.{DataFrame, SparkSession} import org.ekstep.analytics.framework.FrameworkContext import org.ekstep.analytics.framework.conf.AppConf -import org.ekstep.analytics.framework.util.JobLogger +import org.ekstep.analytics.framework.util.{CommonUtil, JobLogger} object ExhaustUtil { @@ -13,22 +13,9 @@ object ExhaustUtil { val format = fileFormat.getOrElse("csv.gz") val batchId = filteredBlobFields.getOrElse("batchId", "*") val year = filteredBlobFields.getOrElse("year", "*") - val weekNumb = filteredBlobFields.getOrElse("weekNum", "*") - - - val url = store match { - case "local" => - filePath + s"${batchId}/${year}-${weekNumb}-*.${format}" - // $COVERAGE-OFF$ for azure testing - case "azure" => - val key = AppConf.getConfig("azure_storage_key") - val file = s"${filePath}${batchId}/${year}-${weekNumb}-*.${format}" - s"wasb://$bucket@$key.blob.core.windows.net/$file" - case "s3" => - //TODO - Need to support the S3 As well. - throw new Exception("s3 is currently not supported.") - // $COVERAGE-ON$ - } + val weekNum = filteredBlobFields.getOrElse("weekNum", "*") + + val url = CommonUtil.getArchivalBlobUrl(store: String, filePath: String, bucket:String, batchId: Any, year: Any, weekNum: Any, format: String) JobLogger.log(s"Fetching data from ${store} ")(new String()) fetch(url, "csv") From d87113c0603c1c1b3b865001c075a091da2a33fa Mon Sep 17 00:00:00 2001 From: utk14 Date: Mon, 25 Oct 2021 15:00:15 +0530 Subject: [PATCH 54/55] Issue SB-24793 feat: Assessment archived data:: Review comments resolved --- .../org/sunbird/analytics/exhaust/util/ExhaustUtil.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 1426d045c..272c11577 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -11,11 +11,11 @@ object ExhaustUtil { def getArchivedData(store: String, filePath: String, bucket: String, blobFields: Map[String, Any], fileFormat: Option[String])(implicit spark: SparkSession, fc: FrameworkContext): DataFrame = { val filteredBlobFields = blobFields.filter(_._2 != null) val format = fileFormat.getOrElse("csv.gz") - val batchId = filteredBlobFields.getOrElse("batchId", "*") + val batchId = filteredBlobFields.getOrElse("batchId", "*").toString() val year = filteredBlobFields.getOrElse("year", "*") - val weekNum = filteredBlobFields.getOrElse("weekNum", "*") + val weekNum = filteredBlobFields.getOrElse("weekNum", "*").toString() - val url = CommonUtil.getArchivalBlobUrl(store: String, filePath: String, bucket:String, batchId: Any, year: Any, weekNum: Any, format: String) + val url = CommonUtil.getArchivalBlobUrl(store, filePath, bucket, batchId, year, weekNum, format) JobLogger.log(s"Fetching data from ${store} ")(new String()) fetch(url, "csv") From 04d027eba04ee170ccfc2d0fee3dc490d85166c1 Mon Sep 17 00:00:00 2001 From: utk14 Date: Tue, 26 Oct 2021 10:12:47 +0530 Subject: [PATCH 55/55] Issue SB-24793 feat: Assessment archived data:: Review comments resolved --- .../scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala index 272c11577..09b8a7d5f 100644 --- a/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala +++ b/data-products/src/main/scala/org/sunbird/analytics/exhaust/util/ExhaustUtil.scala @@ -15,7 +15,8 @@ object ExhaustUtil { val year = filteredBlobFields.getOrElse("year", "*") val weekNum = filteredBlobFields.getOrElse("weekNum", "*").toString() - val url = CommonUtil.getArchivalBlobUrl(store, filePath, bucket, batchId, year, weekNum, format) + val file: String = s"${filePath}${batchId}/${year}-${weekNum}-*.${format}" + val url = CommonUtil.getBlobUrl(store, file, bucket) JobLogger.log(s"Fetching data from ${store} ")(new String()) fetch(url, "csv")