from airflow.decorators import dag from airflow.operators.bash import BashOperator from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago from airflow.utils.trigger_rule import TriggerRule from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig # Define paths to your dbt files dbt_root_path = "/home/dbt/DBT/mrds" dbt_profiles_dir = "/home/dbt/.dbt/profiles.yml" ODS_TABLE = "{{table}}" DATABASE_NAME = "MOPDB" DAG_NAME = f"w_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}" MAPPING_NAME = f"m_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}" # Define function for the retrieval of the current run_id def retrieve_run_id(**kwargs): # Retrieve the run_id from the Airflow context run_id = kwargs['run_id'] # Store the run_id in XCom for future reference ti = kwargs['ti'] ti.xcom_push(key='run_id', value=run_id) return run_id def check_dag_status(**kwargs): for task_instance in kwargs['dag_run'].get_task_instances(): if task_instance.state == 'failed' and task_instance.task_id != kwargs['task_instance'].task_id: raise Exception("Task {} failed. Failing this DAG run".format(task_instance.task_id)) # Define function for the check of the status of the previous tasks def determine_workflow_status(**kwargs): # Check the status of previous tasks task_statuses = kwargs['ti'].xcom_pull(task_ids=['retrieve_run_id', 'control_external_run_start', 'mapping_mopdb']) # If any task failed, set workflow_status to 'N', otherwise 'Y' workflow_status = 'N' if any(status != 'success' for status in task_statuses) else 'Y' return workflow_status @dag( dag_id=DAG_NAME, schedule_interval=None, start_date=days_ago(2), catchup=False ) def run_dag(): # Retrieve run_id retrieve_run_id_task = PythonOperator( task_id='retrieve_run_id', python_callable=retrieve_run_id, provide_context=True, # pool='my_custom_pool', # Create pool in Airflow Web UI with one slot to ensure that only one dag can run it at a time. ) # Run dbt macro control_external_run_start control_external_run_start = BashOperator( task_id='control_external_run_start', bash_command=( 'cd /home/dbt/DBT/mrds && ' 'dbt run-operation control_external_run_start --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' ' '--profiles-dir /home/dbt/.dbt/ --target dev' ) ) # run dbt taskGroup with tag of the mapping name dbtTaskGroup = DbtTaskGroup( group_id=MAPPING_NAME, project_config=ProjectConfig( dbt_project_path = dbt_root_path), profile_config=ProfileConfig( profiles_yml_filepath = dbt_profiles_dir, profile_name="mrds", target_name="dev"), render_config=RenderConfig(select=[f"tag:{MAPPING_NAME}"],), operator_args={'vars': {'orchestration_run_id': '{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}', "input_service_name": DATABASE_NAME, "workflow_name": DAG_NAME }} ) control_external_run_end = BashOperator( task_id='control_external_run_end', bash_command=( 'cd /home/dbt/DBT/mrds && ' 'dbt run-operation control_external_run_end --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' ' '--profiles-dir /home/dbt/.dbt/ --target dev' ), trigger_rule=TriggerRule.ALL_DONE # Run regardless of previous task outcomes ) dag_status = PythonOperator( task_id='dag_status', provide_context=True, python_callable=check_dag_status, trigger_rule=TriggerRule.ALL_DONE, # Ensures this task runs even if upstream fails ) # Set task dependencies retrieve_run_id_task >> control_external_run_start >> [dbtTaskGroup] >> control_external_run_end >> dag_status globals()[DAG_NAME] = run_dag()