diff --git a/release-documents/5.1.0.md b/release-documents/5.1.0.md index 45048a0..c9c41c5 100644 --- a/release-documents/5.1.0.md +++ b/release-documents/5.1.0.md @@ -7,7 +7,7 @@ new aggregated data source called `ml-survey-distinctCount-status` to fix admin . ## Deploy ml-analytics-service -To retrieve the latest release tag for version 5.1.0, please visit the following URL:https://github.com/Sunbird-Ed/ml-analytics-service/tree/release-5.1.0_RC21 +To retrieve the latest release tag for version 5.1.0, please visit the following URL:https://github.com/Sunbird-Ed/ml-analytics-service/tree/release-5.1.0_RC22 To proceed with the deployment process, follow the steps below: @@ -37,15 +37,17 @@ Add configs in main.yml please refer this for the file path : https://github.com - ml_analytics_distinctCnt_survey_status_batch_ingestion_spec : ```html -{"type":"index","spec":{"ioConfig":{"type":"index","inputSource":{"type":"local","baseDir":["local json file storage path"],"filter":"ml_survey_distinctCount_status.json"},"inputFormat":{"type":"json"}},"tuningConfig":{"type":"index","partitionsSpec":{"type":"dynamic"}},"dataSchema":{"dataSource":"ml-surveydistinctCount-status","granularitySpec":{"type":"uniform","queryGranularity":"none","rollup":false,"segmentGranularity":"DAY"},"timestampSpec":{"column":"time_stamp","format":"auto"},"dimensionsSpec":{"dimensions":[{"type":"string","name":"program_name"},{"type":"string","name":"program_id"},{"type":"string","name":"survey_name"},{"type":"string","name":"survey_id"},{"type":"string","name":"submission_status"},{"type":"string","name":"state_name"},{"type":"string","name":"state_externalId"},{"type":"string","name":"district_name"},{"type":"string","name":"district_externalId"},{"type":"string","name":"block_name"},{"type":"string","name":"block_externalId"},{"type":"string","name":"organisation_name"},{"type":"string","name":"organisation_id"},{"type":"string","name":"private_program"},{"type":"string","name":"parent_channel"},{"type":"long","name":"unique_users"},{"type":"long","name":"unique_submissions"},{"type":"string","name":"time_stamp"}]},"metricsSpec":[]}}} +{"type":"index","spec":{"ioConfig":{"type":"index","inputSource":{"type":"azure","uris":["azure://telemetry-data-store/survey/distinctCount/ml_survey_distinctCount_status.json"]},"inputFormat":{"type":"json"}},"tuningConfig":{"type":"index","partitionsSpec":{"type":"dynamic"}},"dataSchema":{"dataSource":"ml-surveydistinctCount-status","granularitySpec":{"type":"uniform","queryGranularity":"none","rollup":false,"segmentGranularity":"DAY"},"timestampSpec":{"column":"time_stamp","format":"auto"},"dimensionsSpec":{"dimensions":[{"type":"string","name":"program_name"},{"type":"string","name":"program_id"},{"type":"string","name":"survey_name"},{"type":"string","name":"survey_id"},{"type":"string","name":"submission_status"},{"type":"string","name":"state_name"},{"type":"string","name":"state_externalId"},{"type":"string","name":"district_name"},{"type":"string","name":"district_externalId"},{"type":"string","name":"block_name"},{"type":"string","name":"block_externalId"},{"type":"string","name":"organisation_name"},{"type":"string","name":"organisation_id"},{"type":"string","name":"private_program"},{"type":"string","name":"parent_channel"},{"type":"long","name":"unique_users"},{"type":"long","name":"unique_submissions"},{"type":"string","name":"time_stamp"}]},"metricsSpec":[]}}} ``` Note : change the values for keys such as (spec.inConfig.inputSource.baseDir : "local json file storage path") & (spec.inConfig.inputSource.type: "cloud provider ex.azure") -- ml_analytics_distinctCnt_survey_status_batch_ingestion_spec :"local json file storage path" +- ml_analytics_survey_distinctCount_status_filepath :"{{ WORKDIR }} +/source/survey/status/distinctCount/output" -- ml_analytics_survey_distinctCount_blob_path : "cloud json file storage path" +- ml_analytics_survey_distinctCount_blob_path : "survey/status/distinctCount/" -- ml_analytics_survey_streaming_success_log_folder_path : "logs storage path" +- ml_analytics_survey_streaming_success_log_folder_path : "{{ WORKDIR }} +/logs/survey/status/distinctCount" ### Backend Json Update the backend json using this API `/api/data/v1/report/jobs/` diff --git a/survey/pyspark_sur_distinct_count_status.py b/survey/pyspark_sur_distinct_count_status.py index bdd361f..6ca6846 100644 --- a/survey/pyspark_sur_distinct_count_status.py +++ b/survey/pyspark_sur_distinct_count_status.py @@ -31,10 +31,9 @@ root_path = config_path[0] sys.path.append(root_path) -from cloud_storage.cloud import MultiCloud -cloud_init = MultiCloud() - sys.path.append(config.get("COMMON","cloud_module_path")) +from cloud import MultiCloud +cloud_init = MultiCloud() # date formating current_date = datetime.date.today()