-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[dagster-airlift] Handle run retries
- Loading branch information
Showing
10 changed files
with
326 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
examples/experimental/dagster-airlift/dagster_airlift/in_airflow/dagster_run_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from typing import Any, Mapping, NamedTuple, Optional | ||
|
||
TERMINAL_STATI = ["SUCCESS", "FAILURE", "CANCELED"] | ||
SYSTEM_TAG_PREFIX = "dagster/" | ||
MAX_RETRIES_TAG = f"{SYSTEM_TAG_PREFIX}max_retries" | ||
RETRY_NUMBER_TAG = f"{SYSTEM_TAG_PREFIX}retry_number" | ||
PARENT_RUN_ID_TAG = f"{SYSTEM_TAG_PREFIX}parent_run_id" | ||
SUCCESS_STATUS = "SUCCESS" | ||
RETRY_ON_ASSET_OR_OP_FAILURE_TAG = f"{SYSTEM_TAG_PREFIX}retry_on_asset_or_op_failure" | ||
RUN_FAILURE_REASON_TAG = f"{SYSTEM_TAG_PREFIX}failure_reason" | ||
STEP_FAILURE_REASON = "STEP_FAILURE" | ||
|
||
|
||
class DagsterRunResult(NamedTuple): | ||
status: str | ||
tags: Mapping[str, Any] | ||
|
||
@property | ||
def run_retries_configured(self) -> bool: | ||
return MAX_RETRIES_TAG in self.tags | ||
|
||
@property | ||
def has_remaining_retries(self) -> bool: | ||
if MAX_RETRIES_TAG not in self.tags: | ||
raise Exception( | ||
"Tried to retrieve tags from run, but run retries " | ||
"were either not set or not properly configured. Found tags: {self.tags}" | ||
) | ||
return self.max_retries - self.retry_number > 0 | ||
|
||
@property | ||
def run_will_automatically_retry(self) -> bool: | ||
if not self.run_retries_configured: | ||
return False | ||
if ( | ||
not self.should_retry_on_asset_or_op_failure | ||
and self.failure_reason == STEP_FAILURE_REASON | ||
): | ||
return False | ||
return self.has_remaining_retries | ||
|
||
@property | ||
def should_retry_on_asset_or_op_failure(self) -> bool: | ||
return get_boolean_tag_value(self.tags.get(RETRY_ON_ASSET_OR_OP_FAILURE_TAG), True) | ||
|
||
@property | ||
def failure_reason(self) -> Optional[str]: | ||
return self.tags.get(RUN_FAILURE_REASON_TAG) | ||
|
||
@property | ||
def retry_number(self) -> int: | ||
# this is sketchy | ||
return int(self.tags.get(RETRY_NUMBER_TAG, 0)) | ||
|
||
@property | ||
def success(self) -> bool: | ||
return self.status == SUCCESS_STATUS | ||
|
||
@property | ||
def max_retries(self) -> int: | ||
if MAX_RETRIES_TAG not in self.tags: | ||
raise Exception("Could not determine max retries by tag because tag is not set.") | ||
return int(self.tags[MAX_RETRIES_TAG]) | ||
|
||
|
||
def get_boolean_tag_value(tag_value: Optional[str], default_value: bool = False) -> bool: | ||
if tag_value is None: | ||
return default_value | ||
|
||
return tag_value.lower() not in {"false", "none", "0", ""} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
...ter-airlift/examples/kitchen-sink/kitchen_sink/airflow_dags/migrated_asset_has_retries.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from pathlib import Path | ||
from typing import Any, Mapping | ||
|
||
from airflow import DAG | ||
from airflow.operators.python import PythonOperator | ||
from dagster._time import get_current_datetime_midnight | ||
from dagster_airlift.in_airflow import DefaultProxyTaskToDagsterOperator, proxying_to_dagster | ||
from dagster_airlift.in_airflow.dagster_run_utils import ( | ||
MAX_RETRIES_TAG, | ||
RETRY_ON_ASSET_OR_OP_FAILURE_TAG, | ||
) | ||
from dagster_airlift.in_airflow.proxied_state import load_proxied_state_from_yaml | ||
|
||
|
||
def print_hello() -> None: | ||
print("Hello") # noqa: T201 | ||
|
||
|
||
default_args = { | ||
"owner": "airflow", | ||
"depends_on_past": False, | ||
"retries": 0, | ||
} | ||
|
||
|
||
# Normally this isn't needed, but we're trying to get away with not using a multi-process-safe run storage | ||
# to test behavior here. | ||
class SetDagsterRetryInfoOperator(DefaultProxyTaskToDagsterOperator): | ||
def default_dagster_run_tags(self, context) -> Mapping[str, Any]: | ||
tags = {**super().default_dagster_run_tags(context), MAX_RETRIES_TAG: "3"} | ||
if self.get_airflow_dag_id(context).endswith("not_step_failure"): | ||
tags[RETRY_ON_ASSET_OR_OP_FAILURE_TAG] = "false" | ||
return tags | ||
|
||
|
||
with DAG( | ||
dag_id="migrated_asset_has_retries", | ||
default_args=default_args, | ||
schedule=None, | ||
start_date=get_current_datetime_midnight(), | ||
# We pause this dag upon creation to avoid running it immediately | ||
is_paused_upon_creation=False, | ||
) as minute_dag: | ||
PythonOperator(task_id="my_task", python_callable=print_hello) | ||
|
||
|
||
with DAG( | ||
dag_id="migrated_asset_has_retries_not_step_failure", | ||
default_args=default_args, | ||
schedule=None, | ||
start_date=get_current_datetime_midnight(), | ||
# We pause this dag upon creation to avoid running it immediately | ||
is_paused_upon_creation=False, | ||
) as minute_dag: | ||
PythonOperator(task_id="my_task", python_callable=print_hello) | ||
|
||
|
||
proxying_to_dagster( | ||
proxied_state=load_proxied_state_from_yaml(Path(__file__).parent / "proxied_state"), | ||
global_vars=globals(), | ||
build_from_task_fn=SetDagsterRetryInfoOperator.build_from_task, | ||
) |
3 changes: 3 additions & 0 deletions
3
...ples/kitchen-sink/kitchen_sink/airflow_dags/proxied_state/migrated_asset_has_retries.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
tasks: | ||
- id: my_task | ||
proxied: True |
3 changes: 3 additions & 0 deletions
3
.../kitchen_sink/airflow_dags/proxied_state/migrated_asset_has_retries_not_step_failure.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
tasks: | ||
- id: my_task | ||
proxied: True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
...tal/dagster-airlift/examples/kitchen-sink/kitchen_sink/dagster_defs/retries_configured.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from dagster import AssetExecutionContext, asset, materialize | ||
from dagster._core.storage.tags import MAX_RETRIES_TAG, PARENT_RUN_ID_TAG, RETRY_NUMBER_TAG | ||
|
||
|
||
# Asset that simulates having run retries activated (so that we don't have to stand up non-sqlite-storage) | ||
@asset | ||
def succeeds_on_final_retry(context: AssetExecutionContext): | ||
if RETRY_NUMBER_TAG not in context.run_tags or int(context.run_tags[RETRY_NUMBER_TAG]) < 2: | ||
# Launch a run of the "next retry" | ||
current_retry = int(context.run_tags.get(RETRY_NUMBER_TAG, 0)) | ||
materialize( | ||
[succeeds_on_final_retry], | ||
instance=context.instance, | ||
tags={ | ||
**context.run_tags, | ||
RETRY_NUMBER_TAG: str(current_retry + 1), | ||
PARENT_RUN_ID_TAG: context.run_id, | ||
MAX_RETRIES_TAG: "3", | ||
}, | ||
) | ||
raise Exception("oops i failed") | ||
return None | ||
|
||
|
||
@asset | ||
def just_fails(): | ||
raise Exception("I fail every time") |
Oops, something went wrong.