-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathget_dag_id.py
95 lines (81 loc) · 3.48 KB
/
get_dag_id.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
"""
Details
-------
get_dag_id.py
Description
-----------
based on https://medium.com/apache-airflow/magic-loop-in-airflow-reloaded-3e1bd8fb6671
This function, when referenced from an Airflow DAG, will return NULL if not part of a
Celery Task execution, or will return the DAG ID string if it is.
Usage
-----
Use this to make your dag parsing smarter, i.e. skip DAG creation during tasks for all
but the current DAG id, or skip retrieving data during the DAG scheduler parse that is
only needed during task execution.
"""
import setproctitle, ast
"""
Skip any flags provided to the process, per
https://airflow.apache.org/docs/apache-airflow/stable/cli-and-env-variables-ref.html#run
"""
skip_two = [
'--cfg-path',
'-d', '--depends-on-past',
'--map-index',
'-p', '--pickle',
'--pool',
'-S', '--subdir'
]
def parse_args(args, n):
if len(args) > 0:
while n<len(args):
curr = args[n]
if not curr.startswith('-'):
break
elif curr in skip_two:
n=n+2
else:
n=n+1
return curr
"""
Using the process information, determine the DAG ID
"""
def GetCurrentDag():
current_dag = None
try:
PROCTITLE_SCHEDULER_PREFIX = "airflow scheduler"
PROCTITLE_SUPERVISOR_PREFIX = "airflow task supervisor: "
PROCTITLE_TASK_RUNNER_PREFIX = "airflow task runner: "
PROCTITLE_NEW_PYTHON_INTERPRETER = "/usr/bin/python3"
proctitle = str(setproctitle.getproctitle())
if not proctitle.startswith(PROCTITLE_SCHEDULER_PREFIX):
if proctitle.startswith(PROCTITLE_SUPERVISOR_PREFIX): # core.execute_tasks_new_python_interpreter = False
args_string = proctitle[len(PROCTITLE_SUPERVISOR_PREFIX):]
args = ast.literal_eval(args_string)
if len(args) > 3 and args[1] == "tasks":
current_dag = parse_args(args,3)
elif proctitle.startswith(PROCTITLE_TASK_RUNNER_PREFIX):
args = proctitle[len(PROCTITLE_TASK_RUNNER_PREFIX):].split(" ")
if len(args) > 0:
current_dag = parse_args(args,0)
elif proctitle.startswith(PROCTITLE_NEW_PYTHON_INTERPRETER): # core.execute_tasks_new_python_interpreter = True
args = proctitle[len(PROCTITLE_NEW_PYTHON_INTERPRETER):].split(" ")
if len(args) > 0:
current_dag = parse_args(args,4)
except Exception as e:
print("Error determining DAG ID:",e)
return current_dag