Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add refresh schedules and properties to export and import. #114

Merged
merged 4 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Lint
on: [push, pull_request]
env:
PYTHON_VERSION: 3.9
PYTHON_VERSION: 3.11

jobs:
lint:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Mypy (Type check)
on: [push, pull_request]
env:
PYTHON_VERSION: 3.9
PYTHON_VERSION: 3.11

jobs:
mypy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11"]
module: [Core]

# We want to run on external PRs, but not on our own internal PRs as they'll be run
Expand Down
5 changes: 5 additions & 0 deletions core/operation/baseoperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
ASSET_DIR = "assets"
TEMPLATE_DIR = os.path.join(ASSET_DIR, "templates")
DATA_SET_DIR = os.path.join(ASSET_DIR, "data-sets")
DATA_SET_REFRESH_PROPS_SUFFIX = "-data-set-refresh-props"
DATA_SET_REFRESH_SCHEDULES_SUFFIX = "-data-set-refresh-schedules"


@dataclass
Expand Down Expand Up @@ -102,3 +104,6 @@ def _describe_data_set(self, data_set_id):

def _resolve_path(self, *paths):
return os.path.join(*paths)

def _resolve_schedules_filename(self, logical_data_set_name: str):
return logical_data_set_name + DATA_SET_REFRESH_SCHEDULES_SUFFIX + ".json"
97 changes: 85 additions & 12 deletions core/operation/export_analysis_operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
import os
from typing import List

from botocore.exceptions import ClientError

from core.operation.baseoperation import (
DATA_SET_DIR,
DATA_SET_REFRESH_PROPS_SUFFIX,
TEMPLATE_DIR,
BaseOperation,
TemplateResponse,
Expand Down Expand Up @@ -82,7 +85,7 @@ def verify_success() -> bool:
map_to_save[i] = self._template_definition[i]

# save the template as json file
definition_json_str = json.dumps(map_to_save, indent=4)
definition_json_str = json.dumps(map_to_save, indent=4, default=str)
template_file_path = self._resolve_path(
self._output_dir, TEMPLATE_DIR, self._template_definition["Name"] + ".json"
)
Expand All @@ -93,9 +96,24 @@ def verify_success() -> bool:

# for each dataset declaration identifiers
for di in data_set_identifier_declarations:
# save to json file
ds_file = self._save_dataset_to_file(di=di)
identifier = di["Identifier"]
arn = di["DataSetArn"]
data_set_id = arn.split("dataset/", 1)[1]
# save data set definition to json file
ds_file = self._save_dataset_to_file(data_set_id, identifier)
files_to_update.append(ds_file)
ds_refresh_props_file = self._save_dataset_refresh_props_to_file(
data_set_id, identifier
)
if ds_refresh_props_file:
files_to_update.append(ds_refresh_props_file)
ds_refresh_schedules_file = (
self._save_dataset_refresh_schedules_to_file(
data_set_id, identifier
)
)
if ds_refresh_schedules_file:
files_to_update.append(ds_refresh_props_file)

return {"status": "success", "files_exported": files_to_update}

Expand All @@ -116,31 +134,86 @@ def _create_or_update_template_from_analysis(
}
return self._recreate_template(template_data=params)

def _save_dataset_to_file(self, di) -> str:
def _save_dataset_to_file(
self, data_set_id: str, logical_data_set_name: str
) -> str:
"""

:param di: dataset map
:return: The path of the dataset file
"""
identifier = di["Identifier"]
arn = di["DataSetArn"]
dataset_id = arn.split("dataset/", 1)[1]
ds_def_elements_to_save = self._describe_data_set(dataset_id)

ds_def_elements_to_save = self._describe_data_set(data_set_id)
# remove the following fields from the response before saving it.
for i in ["Arn", "DataSetId", "CreatedTime", "LastUpdatedTime"]:
ds_def_elements_to_save.pop(i)

# align the data set name with the identifier
ds_def_elements_to_save["Name"] = identifier
ds_def_elements_to_save["Name"] = logical_data_set_name
# remove the datasource arn since this will need to be overridden
recursively_replace_value(ds_def_elements_to_save, "DataSourceArn", "")
# save what is left to disk
ds_def_str = json.dumps(ds_def_elements_to_save, indent=4)
dataset_file_path = self._resolve_path(
self._output_dir, DATA_SET_DIR, identifier + ".json"
self._output_dir, DATA_SET_DIR, logical_data_set_name + ".json"
)

with open(dataset_file_path, "w") as dataset_file:
dataset_file.write(ds_def_str)

return dataset_file_path

def _save_dataset_refresh_props_to_file(
self, data_set_id: str, logical_data_set_name: str
) -> str | None:
# get data set refresh props
try:
response = self._qs_client.describe_data_set_refresh_properties(
AwsAccountId=self._aws_account_id, DataSetId=data_set_id
)
data_set_refresh_props = response["DataSetRefreshProperties"]
data_set_refresh_props_str = json.dumps(data_set_refresh_props, indent=4)
file_path = self._resolve_path(
self._output_dir,
DATA_SET_DIR,
logical_data_set_name + DATA_SET_REFRESH_PROPS_SUFFIX + ".json",
)

with open(file_path, "w") as props_file:
props_file.write(data_set_refresh_props_str)

return file_path
except ClientError as e:
# If the refresh properties don't exist an InvalidParameterException is thrown
# rather than ResourceNotFoundException as I would have expected. Having a generic catch all here is
# probably sufficient.
return None

def _save_dataset_refresh_schedules_to_file(
self, data_set_id: str, logical_data_set_name: str
) -> str | None:
try:
response = self._qs_client.list_refresh_schedules(
AwsAccountId=self._aws_account_id, DataSetId=data_set_id
)
refresh_schedules = response["RefreshSchedules"]
# remove account specific info
for schedule in refresh_schedules:
for prop in ["ScheduleId", "StartAfterDateTime", "Arn"]:
del schedule[prop]

data_set_refresh_schedules = {"RefreshSchedules": refresh_schedules}

file_path = self._resolve_path(
self._output_dir,
DATA_SET_DIR,
self._resolve_schedules_filename(logical_data_set_name),
)

with open(file_path, "w") as schedules_file:
data_set_refresh_schedules_str = json.dumps(
data_set_refresh_schedules, indent=4, default=str
)
schedules_file.write(data_set_refresh_schedules_str)
return file_path

except self._qs_client.exceptions.ResourceNotFoundException as e:
return None
96 changes: 94 additions & 2 deletions core/operation/import_from_json_operation.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import datetime
import json
import os.path
import time
from dataclasses import dataclass

from core.operation.baseoperation import DATA_SET_DIR, TEMPLATE_DIR, BaseOperation
from botocore.exceptions import ClientError

from core.operation.baseoperation import (
DATA_SET_DIR,
DATA_SET_REFRESH_PROPS_SUFFIX,
TEMPLATE_DIR,
BaseOperation,
)
from core.util import recursively_replace_value


Expand Down Expand Up @@ -72,8 +81,18 @@ def execute(self) -> dict:
dataset["DataSetId"] = self._resolve_data_set_id_from_placeholder(
placeholder=placeholder, namespace=self._target_namespace
)
dataset["Name"] = dataset["Name"]

data_set_id = dataset["DataSetId"]
# remove any associated refresh schedules
self._delete_refresh_schedules(data_set_id=data_set_id)
logical_data_set_name = dataset["Name"]

# recreate dataset
ds_response = self._recreate_data_set(dataset_definition=dataset)
# recreate the schedule
self._create_refresh_schedule(
logical_data_set_name=logical_data_set_name, data_set_id=data_set_id
)

data_sets_created.append(
{
Expand Down Expand Up @@ -135,3 +154,76 @@ def _recreate_data_set(self, dataset_definition: dict):
)

return DataSetResponse(response["Arn"], response["DataSetId"])

def _delete_refresh_schedules(self, data_set_id: str):
params = {
"AwsAccountId": self._aws_account_id,
"DataSetId": data_set_id,
}

# delete the refresh properties
try:
self._qs_client.delete_data_set_refresh_properties(**params)
except ClientError as e:
pass

# delete any pre-existing schedules
refresh_schedules = []

try:
response = self._qs_client.list_refresh_schedules(**params)
refresh_schedules = [x["ScheduleId"] for x in response["RefreshSchedules"]]

except self._qs_client.exceptions.ResourceNotFoundException:
pass

for schedule_id in refresh_schedules:
delete_params = {}
delete_params.update(params)
delete_params.update({"ScheduleId": schedule_id})
self._qs_client.delete_refresh_schedule(**delete_params)

def _get_tomorrow(self) -> datetime.datetime:
return datetime.datetime.now() + datetime.timedelta(days=1)

def _create_refresh_schedule(self, logical_data_set_name: str, data_set_id: str):
params = {
"AwsAccountId": self._aws_account_id,
"DataSetId": data_set_id,
}

dataset_refresh_props_filename = self._resolve_path(
self._input_dir,
DATA_SET_DIR,
logical_data_set_name + DATA_SET_REFRESH_PROPS_SUFFIX + ".json",
)

if os.path.exists(dataset_refresh_props_filename):
with open(dataset_refresh_props_filename) as dataset_refresh_props:
props = {
"DataSetRefreshProperties": json.loads(dataset_refresh_props.read())
}
props.update(params)
self._qs_client.put_data_set_refresh_properties(**props)

dataset_refresh_filename = self._resolve_path(
self._input_dir,
DATA_SET_DIR,
self._resolve_schedules_filename(logical_data_set_name),
)

if os.path.exists(dataset_refresh_filename):
with open(dataset_refresh_filename) as dataset_refresh_file:
schedules = json.loads(dataset_refresh_file.read())["RefreshSchedules"]

for index, schedule in enumerate(schedules):
start_after_datetime = self._get_tomorrow()
# start after date must be in the future
schedule["ScheduleId"] = data_set_id + "-" + str(index)
schedule["StartAfterDateTime"] = start_after_datetime
schedule_params = {"Schedule": schedule}
schedule_params.update(params)
response = self._qs_client.create_refresh_schedule(
**schedule_params
)
self._log.info(f"create_refresh_schedule_response={response}")
1 change: 1 addition & 0 deletions core/operation/publish_dashboard_from_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def execute(self) -> dict:
self._s3_client.put_object(
Bucket=self._result_bucket,
Key=self._result_key,
ContentType="application/json",
Body=json.dumps(result["dashboard_info"]),
)

Expand Down
34 changes: 34 additions & 0 deletions tests/core/operation/analysis_test_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,37 @@ def describe_data_set_2_response():
},
"RequestId": "3e6ad967-c44d-4a86-8391-be51ebf978c5",
}


def describe_refresh_props_response():
return {
"DataSetRefreshProperties": {
"RefreshConfiguration": {
"IncrementalRefresh": {
"LookbackWindow": {
"ColumnName": "time_stamp",
"Size": 1,
"SizeUnit": "DAY",
}
}
}
}
}


def list_refresh_schedules_response():
return {
"RefreshSchedules": [
{
"ScheduleId": "fcdd5fe8-537d-4e59-947e-af35b5a82385",
"ScheduleFrequency": {
"Interval": "DAILY",
"Timezone": "America/New_York",
"TimeOfTheDay": "23:59",
},
"StartAfterDateTime": "2023-09-29 16:59:00-07:00",
"RefreshType": "INCREMENTAL_REFRESH",
"Arn": "arn:aws:quicksight:us-west-2:128682227026:dataset/e9e15c78-0193-4e4c-9a49-ed005569297d/refresh-schedule/fcdd5fe8-537d-4e59-947e-af35b5a82385",
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"RefreshConfiguration": {
"IncrementalRefresh": {
"LookbackWindow": {
"ColumnName": "time_stamp",
"Size": 1,
"SizeUnit": "DAY"
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"RefreshSchedules": [
{
"ScheduleFrequency": {
"Interval": "DAILY",
"Timezone": "UTC",
"TimeOfTheDay": "07:00"
},
"RefreshType": "INCREMENTAL_REFRESH"
}
]
}
Loading
Loading