-
Notifications
You must be signed in to change notification settings - Fork 14.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AIP-72: Add support to get Variables in task SDK to author tasks #45458
base: main
Are you sure you want to change the base?
Changes from 3 commits
f87beb8
8b7c8ee
79287f5
609cc9e
be9f2aa
e05c8fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,8 +31,11 @@ | |
TaskCallbackRequest, | ||
) | ||
from airflow.configuration import conf | ||
from airflow.models import Variable | ||
from airflow.models.dagbag import DagBag | ||
from airflow.sdk.execution_time.comms import GetConnection, GetVariable | ||
from airflow.sdk.api.datamodels._generated import VariableResponse | ||
from airflow.sdk.execution_time import task_runner | ||
from airflow.sdk.execution_time.comms import GetConnection, GetVariable, VariableResult | ||
from airflow.sdk.execution_time.supervisor import WatchedSubprocess | ||
from airflow.serialization.serialized_objects import LazyDeserializedDAG, SerializedDAG | ||
from airflow.stats import Stats | ||
|
@@ -43,26 +46,22 @@ | |
from airflow.typing_compat import Self | ||
from airflow.utils.context import Context | ||
|
||
COMMS_DECODER: task_runner.CommsDecoder[ToChild, ToParent] | ||
|
||
|
||
def _parse_file_entrypoint(): | ||
import os | ||
|
||
import structlog | ||
|
||
from airflow.sdk.execution_time import task_runner | ||
# Parse DAG file, send JSON back up! | ||
|
||
comms_decoder = task_runner.CommsDecoder[DagFileParseRequest, DagFileParsingResult]( | ||
input=sys.stdin, | ||
decoder=TypeAdapter[DagFileParseRequest](DagFileParseRequest), | ||
) | ||
msg = comms_decoder.get_message() | ||
comms_decoder.request_socket = os.fdopen(msg.requests_fd, "wb", buffering=0) | ||
msg = COMMS_DECODER.get_message() | ||
COMMS_DECODER.request_socket = os.fdopen(msg.requests_fd, "wb", buffering=0) | ||
|
||
log = structlog.get_logger(logger_name="task") | ||
|
||
result = _parse_file(msg, log) | ||
comms_decoder.send_request(log, result) | ||
COMMS_DECODER.send_request(log, result) | ||
|
||
|
||
def _parse_file(msg: DagFileParseRequest, log: FilteringBoundLogger) -> DagFileParsingResult: | ||
|
@@ -180,6 +179,11 @@ class DagFileParsingResult(BaseModel): | |
Field(discriminator="type"), | ||
] | ||
|
||
ToChild = Annotated[ | ||
Union[DagFileParseRequest, VariableResult], | ||
Field(discriminator="type"), | ||
] | ||
|
||
|
||
@attrs.define() | ||
class DagFileProcessorProcess(WatchedSubprocess): | ||
|
@@ -203,6 +207,11 @@ def start( # type: ignore[override] | |
target: Callable[[], None] = _parse_file_entrypoint, | ||
**kwargs, | ||
) -> Self: | ||
global COMMS_DECODER | ||
COMMS_DECODER = task_runner.CommsDecoder[ToChild, ToParent]( | ||
input=sys.stdin, | ||
decoder=TypeAdapter[ToChild](ToChild), | ||
) | ||
return super().start(path, callbacks, target=target, client=None, **kwargs) # type:ignore[arg-type] | ||
|
||
def _on_child_started( # type: ignore[override] | ||
|
@@ -234,8 +243,16 @@ def _handle_request(self, msg: ToParent, log: FilteringBoundLogger) -> None: # | |
if isinstance(msg, DagFileParsingResult): | ||
self.parsing_result = msg | ||
return | ||
# GetVariable etc -- parsing a dag can run top level code that asks for an Airflow Variable | ||
super()._handle_request(msg, log) | ||
Comment on lines
-237
to
-238
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We wont really need this, because for cases of variables, connecitons, we will have to interact with the DB model directly. If we go to |
||
elif isinstance(msg, GetVariable): | ||
key = msg.key | ||
try: | ||
value = Variable.get(key) | ||
except KeyError: | ||
log.exception("Variable: %s does not exist", key) | ||
raise | ||
var_result = VariableResult.from_variable_response(VariableResponse(key=key, value=value)) | ||
resp = var_result.model_dump_json(exclude_unset=True).encode() | ||
self.stdin.write(resp + b"\n") | ||
|
||
@property | ||
def is_ready(self) -> bool: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
from __future__ import annotations | ||
|
||
from airflow import DAG | ||
from airflow.models.baseoperator import BaseOperator | ||
from airflow.sdk import Variable | ||
|
||
value = Variable.get(key="my_var") | ||
|
||
|
||
class CustomOperator(BaseOperator): | ||
def execute(self, context): | ||
print(f"Variable defined at top level of dag has value: {value}") | ||
|
||
|
||
with DAG(dag_id="example_get_variable_using_task_sdk") as dag: | ||
CustomOperator(task_id="print_top_level_variable") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,18 +66,24 @@ def _get_connection(conn_id: str) -> Connection: | |
return _convert_connection_result_conn(msg) | ||
|
||
|
||
def _get_variable(key: str, deserialize_json: bool) -> Variable: | ||
def _get_variable(key: str, deserialize_json: bool = False) -> Variable: | ||
# TODO: This should probably be moved to a separate module like `airflow.sdk.execution_time.comms` | ||
# or `airflow.sdk.execution_time.variable` | ||
# A reason to not move it to `airflow.sdk.execution_time.comms` is that it | ||
# will make that module depend on Task SDK, which is not ideal because we intend to | ||
# keep Task SDK as a separate package than execution time mods. | ||
from airflow.sdk.execution_time.comms import ErrorResponse, GetVariable | ||
from airflow.sdk.execution_time.task_runner import SUPERVISOR_COMMS | ||
|
||
try: | ||
# We check the hypothesis if the request for variable came from task. | ||
from airflow.sdk.execution_time.task_runner import SUPERVISOR_COMMS as COMMS # type: ignore | ||
except ImportError: | ||
# If not, hypothesis is false and this request is from dag level. | ||
from airflow.dag_processing.processor import COMMS_DECODER as COMMS # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if this one fails too, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not too happy with this one. Wondering if we can do anything better here.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The challenge here is to know if the context is dag or task level. I don't seem to find a clear distinction to point out at and use |
||
|
||
log = structlog.get_logger(logger_name="task") | ||
SUPERVISOR_COMMS.send_request(log=log, msg=GetVariable(key=key)) | ||
msg = SUPERVISOR_COMMS.get_message() | ||
COMMS.send_request(log=log, msg=GetVariable(key=key)) | ||
msg = COMMS.get_message() | ||
if isinstance(msg, ErrorResponse): | ||
raise AirflowRuntimeError(msg) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We wont really need this, because for cases of variables, connecitons, we will have to interact with the DB model directly. If we go to
super(). _handle_request
, it brings the SDK API client into picture, which shouldn't be needed for DAG level stuff