96 lines
4.4 KiB
Python
96 lines
4.4 KiB
Python
from airflow.decorators import dag
|
|
from airflow.operators.bash import BashOperator
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.utils.dates import days_ago
|
|
from airflow.utils.trigger_rule import TriggerRule
|
|
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
|
|
|
# Define paths to your dbt files
|
|
dbt_root_path = "/home/dbt/DBT/mrds"
|
|
dbt_profiles_dir = "/home/dbt/.dbt/profiles.yml"
|
|
|
|
ODS_TABLE = "{{table}}"
|
|
DATABASE_NAME = "MOPDB"
|
|
DAG_NAME = f"w_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}"
|
|
MAPPING_NAME = f"m_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}"
|
|
|
|
|
|
# Define function for the retrieval of the current run_id
|
|
def retrieve_run_id(**kwargs):
|
|
# Retrieve the run_id from the Airflow context
|
|
run_id = kwargs['run_id']
|
|
# Store the run_id in XCom for future reference
|
|
ti = kwargs['ti']
|
|
ti.xcom_push(key='run_id', value=run_id)
|
|
return run_id
|
|
|
|
def check_dag_status(**kwargs):
|
|
for task_instance in kwargs['dag_run'].get_task_instances():
|
|
if task_instance.state == 'failed' and task_instance.task_id != kwargs['task_instance'].task_id:
|
|
raise Exception("Task {} failed. Failing this DAG run".format(task_instance.task_id))
|
|
|
|
# Define function for the check of the status of the previous tasks
|
|
def determine_workflow_status(**kwargs):
|
|
# Check the status of previous tasks
|
|
task_statuses = kwargs['ti'].xcom_pull(task_ids=['retrieve_run_id', 'control_external_run_start', 'mapping_mopdb'])
|
|
# If any task failed, set workflow_status to 'N', otherwise 'Y'
|
|
workflow_status = 'N' if any(status != 'success' for status in task_statuses) else 'Y'
|
|
return workflow_status
|
|
|
|
@dag(
|
|
dag_id=DAG_NAME,
|
|
schedule_interval=None,
|
|
start_date=days_ago(2),
|
|
catchup=False
|
|
)
|
|
def run_dag():
|
|
# Retrieve run_id
|
|
retrieve_run_id_task = PythonOperator(
|
|
task_id='retrieve_run_id',
|
|
python_callable=retrieve_run_id,
|
|
provide_context=True,
|
|
# pool='my_custom_pool', # Create pool in Airflow Web UI with one slot to ensure that only one dag can run it at a time.
|
|
)
|
|
# Run dbt macro control_external_run_start
|
|
control_external_run_start = BashOperator(
|
|
task_id='control_external_run_start',
|
|
bash_command=(
|
|
'cd /home/dbt/DBT/mrds && '
|
|
'dbt run-operation control_external_run_start --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' '
|
|
'--profiles-dir /home/dbt/.dbt/ --target dev'
|
|
)
|
|
)
|
|
# run dbt taskGroup with tag of the mapping name
|
|
dbtTaskGroup = DbtTaskGroup(
|
|
group_id=MAPPING_NAME,
|
|
project_config=ProjectConfig(
|
|
dbt_project_path = dbt_root_path),
|
|
profile_config=ProfileConfig(
|
|
profiles_yml_filepath = dbt_profiles_dir,
|
|
profile_name="mrds",
|
|
target_name="dev"),
|
|
render_config=RenderConfig(select=[f"tag:{MAPPING_NAME}"],),
|
|
operator_args={'vars': {'orchestration_run_id': '{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}', "input_service_name": DATABASE_NAME, "workflow_name": DAG_NAME }}
|
|
)
|
|
|
|
control_external_run_end = BashOperator(
|
|
task_id='control_external_run_end',
|
|
bash_command=(
|
|
'cd /home/dbt/DBT/mrds && '
|
|
'dbt run-operation control_external_run_end --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' '
|
|
'--profiles-dir /home/dbt/.dbt/ --target dev'
|
|
),
|
|
trigger_rule=TriggerRule.ALL_DONE # Run regardless of previous task outcomes
|
|
)
|
|
|
|
dag_status = PythonOperator(
|
|
task_id='dag_status',
|
|
provide_context=True,
|
|
python_callable=check_dag_status,
|
|
trigger_rule=TriggerRule.ALL_DONE, # Ensures this task runs even if upstream fails
|
|
)
|
|
|
|
# Set task dependencies
|
|
retrieve_run_id_task >> control_external_run_start >> [dbtTaskGroup] >> control_external_run_end >> dag_status
|
|
globals()[DAG_NAME] = run_dag()
|