import sys import os from airflow import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago from datetime import datetime, timedelta import logging ### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY # Importing custom modules sys.path.append('/opt/airflow/python/connectors/devo') sys.path.append('/opt/airflow/python/mrds_common') sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd') # Import your functions from mrds.utils.manage_runs import init_workflow, finalise_workflow from devo_connector import main as devo_main from devo_connector_v2 import run as devo_main2 from mrds.core import main as mrds_main # Default arguments default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': days_ago(1), 'email_on_failure': False, 'email_on_retry': False, 'retries': 3, 'retry_delay': timedelta(minutes=5), } # Dynamic name extraction from basename dag_id = os.path.splitext(os.path.basename(__file__))[0] with DAG( dag_id=dag_id, default_args=default_args, description='Run devo RQSD data ingestion workflow with MRDS processing', schedule_interval=None, catchup=False, tags=["Devo", "RQSD", "MRDS", "Connector"], params={ "source_filename": "", "config_file": "", }, ) as dag: def run_devo_connector_rqsd(**context): """Run Devo RQSD connector workflow""" try: env = os.getenv("MRDS_ENV") username = os.getenv("MRDS_LOADER_DB_USER") password = os.getenv("MRDS_LOADER_DB_PASS") tnsalias = os.getenv("MRDS_LOADER_DB_TNS") if not all([username, password, tnsalias]): raise ValueError( "Missing one or more required environment variables: " "MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS" ) logging.info(f"Starting Devo RQSD workflow from Airflow DAG for env '{env}'") database_name = 'MOPDB' workflow_name = 'w_MOPDB_RQSD_PROCESS' workflow_run_id = str(context['ti'].run_id) #comment a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id) logging.info(f"Initialized workflow with history key: {a_workflow_history_key}") workflow_context = { "run_id": workflow_run_id, "a_workflow_history_key": a_workflow_history_key } flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml" env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml" logging.info("Starting Devo RQSD workflow from Airflow DAG") count = devo_main2(workflow_context, flow_config_path, env_config_path, env) print("=================================================================") print(f"Devo RQSD workflow completed successfully with count : {count}") logging.info(f"Devo RQSD workflow completed successfully with count : {count}") # Push the workflow context and history key to XCom for downstream tasks context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key) context['ti'].xcom_push(key='workflow_context', value=workflow_context) except Exception as e: logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True) # If init_workflow succeeded but workflow failed, finalize with FAILED status if 'a_workflow_history_key' in locals(): try: finalise_workflow(a_workflow_history_key, "FAILED") except Exception as finalise_error: logging.error(f"Failed to finalise workflow after error: {finalise_error}") raise def run_mrds_task(**context): """Run MRDS processing task""" try: ti = context.get('ti') workflow_context = ti.xcom_pull(key='workflow_context', task_ids='run_devo_connector_rqsd') if not workflow_context: raise ValueError("No workflow_context from Task 1") print("=== workflow_context ====:",workflow_context) source_filename = "RQSD_OBSERVATIONS.csv" config_file = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_devo_process.yaml" print("---- run_mrds_task ----") print("source_filename :", source_filename) print("config_file = ", config_file) print("------------------------") if not source_filename: raise ValueError("No source_filename provided in DAG run params.") if not config_file: raise ValueError("No config_file path provided in DAG run params.") logging.info(f"Starting MRDS task with source_filename: {source_filename}, config_file: {config_file}") # Run MRDS with the workflow context from the previous task mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=True) logging.info("MRDS task completed successfully") except Exception as e: logging.error(f"Error running MRDS task: {e}", exc_info=True) raise def finalise_workflow_task(**context): """Finalize workflow with SUCCESS status""" # Pull the workflow_history_key from XCom pushed by the main task ti = context['ti'] a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd') if a_workflow_history_key is None: raise ValueError("No workflow history key found in XCom; cannot finalise workflow") # Call finalise with SUCCESS status finalise_workflow(a_workflow_history_key, "SUCCESS") logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS") # Task definitions run_devo = PythonOperator( task_id='run_devo_connector_rqsd', python_callable=run_devo_connector_rqsd, provide_context=True, ) run_mrds = PythonOperator( task_id='run_mrds_task', python_callable=run_mrds_task, provide_context=True, ) finalize = PythonOperator( task_id='finalise_workflow', python_callable=finalise_workflow_task, provide_context=True, ) # Task dependencies run_devo >> run_mrds >> finalize