This commit is contained in:
Grzegorz Michalski
2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions

View File

@@ -0,0 +1,524 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from datetime import datetime, timedelta
import logging
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from devo_connector import DevoConnector
from mrds.core import main as mrds_main
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
TASK_CONFIGS = {
"m_ODS_RQSD_OBSERVATIONS": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_OBSERVATIONS.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS_PARSE.yaml"
},
"m_ODS_RQSD_FX": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_FX.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX_PARSE.yaml"
},
"m_ODS_RQSD_SUBA_DEVO": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_SUBA_DEVO.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO_PARSE.yaml"
}
}
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
dag_id = os.path.splitext(os.path.basename(__file__))[0]
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id
}
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Run devo RQSD data ingestion workflow with conditional MRDS processing - Multi-task',
#schedule_interval='*/10 * * * *', # every 10 minutes
schedule_interval=None,
catchup=False,
max_active_runs=1,
tags=["Devo", "RQSD", "MRDS", "Connector", "Multi-Task"]
) as dag:
def init_workflow_task(**context):
try:
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env_vars = {
'MRDS_ENV': os.getenv("MRDS_ENV"),
'MRDS_LOADER_DB_USER': os.getenv("MRDS_LOADER_DB_USER"),
'MRDS_LOADER_DB_PASS': '***MASKED***' if os.getenv("MRDS_LOADER_DB_PASS") else None,
'MRDS_LOADER_DB_TNS': os.getenv("MRDS_LOADER_DB_TNS"),
'BUCKET_NAMESPACE': os.getenv("BUCKET_NAMESPACE"),
}
for key, value in env_vars.items():
logging.info(f"{key}: {value}")
run_id_value = get_verified_run_id(context)
logging.info(f"Task ID: {context.get('task_instance_key_str', 'N/A')}")
logging.info(f"Run ID: {run_id_value}")
logging.info(f"Execution Date: {context.get('execution_date', 'N/A')}")
dag_obj = context.get('dag')
dag_id_str = dag_obj.dag_id if dag_obj else 'N/A'
logging.info(f"DAG ID: {dag_id_str}")
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing_vars = []
if not username:
missing_vars.append("MRDS_LOADER_DB_USER")
if not password:
missing_vars.append("MRDS_LOADER_DB_PASS")
if not tnsalias:
missing_vars.append("MRDS_LOADER_DB_TNS")
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
logging.error(error_msg)
raise ValueError(error_msg)
logging.info(f"Initializing {workflow_name} workflow for env '{env}'")
workflow_run_id = run_id_value
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
ti = context['ti']
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
ti.xcom_push(key='workflow_context', value=workflow_context)
ti.xcom_push(key='env', value=env)
logging.info("Workflow initialization completed successfully")
except Exception as e:
logging.error(f"Error initializing workflow: {e}", exc_info=True)
raise
def run_devo_connector(**context):
ti = context['ti']
task_id = context['task'].task_id
try:
if task_id.startswith('devo_'):
task_name = task_id.replace('devo_', '')
else:
task_name = task_id
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
error_msg = f"No configuration found for task: {task_name}"
logging.error(error_msg)
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
ti.xcom_push(key='error_message', value=error_msg)
raise ValueError(error_msg)
flow_config_path = task_config["flow_config_path"]
env_config_path = task_config["env_config_path"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
env = ti.xcom_pull(key='env', task_ids='init_workflow')
if not workflow_context:
error_msg = "No workflow_context from init task"
logging.error(error_msg)
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
ti.xcom_push(key='error_message', value=error_msg)
raise ValueError(error_msg)
logging.info(f"Starting Devo connector for env '{env}' - {task_name}")
devo_connector = DevoConnector(
flow_config_path=flow_config_path,
env_config_path=env_config_path,
env=env,
logger=logging.getLogger(f"devo_connector_{env}_{task_name}")
)
row_count = devo_connector.run(workflow_context)
logging.info(f"Devo connector completed successfully for {task_name}. Processed {row_count} rows.")
ti.xcom_push(key='row_count', value=row_count)
ti.xcom_push(key='devo_success', value=True)
ti.xcom_push(key='should_run_mrds', value=row_count > 0)
ti.xcom_push(key='error_message', value=None)
return row_count
except Exception as e:
error_msg = f"Error running Devo connector: {str(e)}"
logging.error(error_msg, exc_info=True)
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
ti.xcom_push(key='error_message', value=error_msg)
raise
def check_should_run_mrds(**context):
ti = context['ti']
dag_run = context['dag_run']
task_id = context['task'].task_id
if task_id.startswith('check_'):
task_name = task_id.replace('check_', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
devo_task_instance = dag_run.get_task_instance(devo_task_id)
logging.info(f"Devo task {devo_task_id} state: {devo_task_instance.state}")
if devo_task_instance.state == 'failed':
error_msg = ti.xcom_pull(key='error_message', task_ids=devo_task_id)
logging.info(f"Devo connector failed for {task_name} - skipping MRDS task. Error: {error_msg}")
raise AirflowSkipException(f"Devo connector failed for {task_name}")
should_run_mrds = ti.xcom_pull(key='should_run_mrds', task_ids=devo_task_id)
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
if task_name == "m_ODS_RQSD_OBSERVATIONS" and (not should_run_mrds or row_count == 0):
logging.info(f"OBSERVATIONS task has no data (row_count: {row_count}) - marking to skip all subsequent tasks")
ti.xcom_push(key='skip_all_tasks', value=True)
raise AirflowSkipException(f"No OBSERVATIONS data found (row_count: {row_count}) - skipping all subsequent processing")
if not should_run_mrds or row_count == 0:
logging.info(f"Skipping MRDS task for {task_name} - row count: {row_count}")
raise AirflowSkipException(f"No data to process for {task_name} (row_count: {row_count})")
logging.info(f"MRDS task should run for {task_name} - row count: {row_count}")
return True
def check_should_run_parallel_tasks(**context):
ti = context['ti']
task_id = context['task'].task_id
if task_id.startswith('check_'):
task_name = task_id.replace('check_', '')
else:
task_name = task_id
skip_all_tasks = ti.xcom_pull(key='skip_all_tasks', task_ids='check_m_ODS_RQSD_OBSERVATIONS')
if skip_all_tasks:
logging.info(f"Skipping {task_name} - OBSERVATIONS task had no data")
raise AirflowSkipException(f"Skipping {task_name} - OBSERVATIONS task had no data")
return check_should_run_mrds(**context)
def run_mrds_task(**context):
ti = context['ti']
task_id = context['task'].task_id
try:
if task_id.endswith('_PARSE'):
task_name = task_id.replace('_PARSE', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
source_filename = task_config["source_filename"]
config_file = task_config["config_file"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
if not workflow_context:
raise ValueError("No workflow_context from init task")
logging.info(f"Starting MRDS task for {task_name} with workflow context: {workflow_context}")
logging.info(f"Processing {row_count} rows from Devo connector")
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
logging.info(f"MRDS task completed successfully for {task_name}")
return "SUCCESS"
except Exception as e:
logging.error(f"Error running MRDS task: {e}", exc_info=True)
raise
def check_success_for_mopdb(**context):
try:
ti = context['ti']
dag_run = context['dag_run']
has_failures = False
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: MRDS task failed")
if has_failures:
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
all_skipped = all(
dag_run.get_task_instance(f'{task_name}_PARSE').state == 'skipped'
for task_name in TASK_CONFIGS.keys()
)
if all_skipped:
error_msg = "All MRDS tasks were skipped (no data to process) - skipping MOPDB trigger"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
return "SUCCESS"
except AirflowSkipException:
raise
except Exception as e:
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
def end_log_table_task(**context):
try:
logging.info("End log table task - always runs at the end")
return "SUCCESS"
except Exception as e:
logging.error(f"Error in end log table task: {e}", exc_info=True)
raise
def finalise_workflow_task(**context):
a_workflow_history_key = None
try:
ti = context['ti']
dag_run = context['dag_run']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
workflow_success = True
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: MRDS task failed")
elif mrds_task.state == 'skipped':
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
devo_success = ti.xcom_pull(key='devo_success', task_ids=devo_task_id)
if devo_success and row_count == 0:
logging.info(f"{task_name} - MRDS task was skipped due to no data - this is normal")
elif not devo_success:
workflow_success = False
failure_reasons.append(f"{task_name}: Devo connector failed, MRDS skipped")
trigger_mopdb_task = dag_run.get_task_instance('trigger_mopdb_dag')
if trigger_mopdb_task.state == 'failed':
workflow_success = False
failure_reasons.append("MOPDB trigger failed")
end_log_task = dag_run.get_task_instance('end_log_table')
if end_log_task.state == 'failed':
workflow_success = False
failure_reasons.append("End log table failed")
if workflow_success:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
else:
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error(f"Finalised workflow with history key {a_workflow_history_key} as FAILED")
logging.error(f"Failure reasons: {', '.join(failure_reasons)}")
raise AirflowFailException(f"Workflow failed: {', '.join(failure_reasons)}")
except AirflowFailException:
raise
except Exception as e:
logging.error(f"Error finalizing workflow: {e}", exc_info=True)
try:
if a_workflow_history_key:
mrds_finalise_workflow(a_workflow_history_key, "N")
except:
pass
raise AirflowFailException(f"Workflow finalization failed: {e}")
init_workflow = PythonOperator(
task_id='init_workflow',
python_callable=init_workflow_task,
provide_context=True,
retries=0,
)
check_mopdb = PythonOperator(
task_id='check_success_for_mopdb',
python_callable=check_success_for_mopdb,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
trigger_mopdb_dag = TriggerDagRunOperator(
task_id='trigger_mopdb_dag',
trigger_dag_id='w_MOPDB_RQSD_PROCESS',
wait_for_completion=False,
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
retries=0,
)
end_log_table = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
task_names = list(TASK_CONFIGS.keys())
first_task_name = task_names[0]
parallel_task_names = task_names[1:]
first_devo_task = PythonOperator(
task_id=f'devo_{first_task_name}',
python_callable=run_devo_connector,
provide_context=True,
retries=0,
)
first_check_task = PythonOperator(
task_id=f'check_{first_task_name}',
python_callable=check_should_run_mrds,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
first_mrds_task = PythonOperator(
task_id=f'{first_task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
retries=0,
)
parallel_tasks = []
for task_name in parallel_task_names:
devo_task = PythonOperator(
task_id=f'devo_{task_name}',
python_callable=run_devo_connector,
provide_context=True,
retries=0,
)
check_task = PythonOperator(
task_id=f'check_{task_name}',
python_callable=check_should_run_parallel_tasks,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
mrds_task = PythonOperator(
task_id=f'{task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
retries=0,
)
first_mrds_task >> devo_task >> check_task >> mrds_task
parallel_tasks.extend([devo_task, check_task, mrds_task])
init_workflow >> first_devo_task >> first_check_task >> first_mrds_task
all_final_tasks = [first_mrds_task]
for task_name in parallel_task_names:
mrds_task_id = f'{task_name}_PARSE'
for task in parallel_tasks:
if task.task_id == mrds_task_id:
all_final_tasks.append(task)
break
for task in all_final_tasks:
task >> check_mopdb
check_mopdb >> end_log_table >> trigger_mopdb_dag >> finalize_workflow

View File

@@ -0,0 +1,424 @@
"""
DAG ID set to w_ODS_RQSD_PROCESS_MANUAL
Removed OBSERVATIONS from TASK_CONFIGS (only FX and SUBA_DEVO remain)
Updated run_devo_connector to always set should_run_mrds=True (no row count check)
Simplified check_should_run_mrds to always proceed if devo task succeeded (manual mode)
Removed special OBSERVATIONS handling logic
Trigger DAG ID changed to w_MOPDB_RQSD_PROCESS_MANUAL
Added "MANUAL" tag
Both FX and SUBA_DEVO tasks run in parallel from init
Removed skipped task handling for no data scenarios in finalize
"""
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from datetime import datetime, timedelta
import logging
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from devo_connector import DevoConnector
from mrds.core import main as mrds_main
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
TASK_CONFIGS = {
"m_ODS_RQSD_FX": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_FX.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX_PARSE.yaml"
},
"m_ODS_RQSD_SUBA_DEVO": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_SUBA_DEVO.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO_PARSE.yaml"
}
}
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
dag_id = "w_ODS_RQSD_PROCESS_MANUAL"
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id
}
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Manual trigger for RQSD data processing',
schedule_interval=None,
catchup=False,
tags=["Devo", "RQSD", "MRDS", "Connector", "Multi-Task", "MANUAL"]
) as dag:
def init_workflow_task(**context):
try:
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env_vars = {
'MRDS_ENV': os.getenv("MRDS_ENV"),
'MRDS_LOADER_DB_USER': os.getenv("MRDS_LOADER_DB_USER"),
'MRDS_LOADER_DB_PASS': '***MASKED***' if os.getenv("MRDS_LOADER_DB_PASS") else None,
'MRDS_LOADER_DB_TNS': os.getenv("MRDS_LOADER_DB_TNS"),
'BUCKET_NAMESPACE': os.getenv("BUCKET_NAMESPACE"),
}
for key, value in env_vars.items():
logging.info(f"{key}: {value}")
run_id_value = get_verified_run_id(context)
logging.info(f"Task ID: {context.get('task_instance_key_str', 'N/A')}")
logging.info(f"Run ID: {run_id_value}")
logging.info(f"Execution Date: {context.get('execution_date', 'N/A')}")
dag_obj = context.get('dag')
dag_id_str = dag_obj.dag_id if dag_obj else 'N/A'
logging.info(f"DAG ID: {dag_id_str}")
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing_vars = []
if not username:
missing_vars.append("MRDS_LOADER_DB_USER")
if not password:
missing_vars.append("MRDS_LOADER_DB_PASS")
if not tnsalias:
missing_vars.append("MRDS_LOADER_DB_TNS")
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
logging.error(error_msg)
raise ValueError(error_msg)
logging.info(f"Initializing {workflow_name} workflow for env '{env}'")
workflow_run_id = run_id_value
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
ti = context['ti']
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
ti.xcom_push(key='workflow_context', value=workflow_context)
ti.xcom_push(key='env', value=env)
logging.info("Workflow initialization completed successfully")
except Exception as e:
logging.error(f"Error initializing workflow: {e}", exc_info=True)
raise
def run_devo_connector(**context):
try:
ti = context['ti']
task_id = context['task'].task_id
if task_id.startswith('devo_'):
task_name = task_id.replace('devo_', '')
else:
task_name = task_id
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
flow_config_path = task_config["flow_config_path"]
env_config_path = task_config["env_config_path"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
env = ti.xcom_pull(key='env', task_ids='init_workflow')
if not workflow_context:
raise ValueError("No workflow_context from init task")
logging.info(f"Starting Devo connector for env '{env}' - {task_name}")
devo_connector = DevoConnector(
flow_config_path=flow_config_path,
env_config_path=env_config_path,
env=env,
logger=logging.getLogger(f"devo_connector_{env}_{task_name}")
)
row_count = devo_connector.run(workflow_context)
logging.info(f"Devo connector completed successfully for {task_name}. Processed {row_count} rows.")
ti.xcom_push(key='row_count', value=row_count)
ti.xcom_push(key='devo_success', value=True)
ti.xcom_push(key='should_run_mrds', value=True)
return row_count
except Exception as e:
logging.error(f"Error running Devo connector: {e}", exc_info=True)
ti = context['ti']
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
raise
def check_should_run_mrds(**context):
ti = context['ti']
dag_run = context['dag_run']
task_id = context['task'].task_id
if task_id.startswith('check_'):
task_name = task_id.replace('check_', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
devo_task_instance = dag_run.get_task_instance(devo_task_id)
logging.info(f"Devo task state: {devo_task_instance.state}")
if devo_task_instance.state == 'failed':
logging.info(f"Devo connector failed for {task_name} - skipping MRDS task")
raise AirflowSkipException(f"Devo connector failed for {task_name}")
logging.info(f"MRDS task will run for {task_name} - manual trigger mode")
return True
def run_mrds_task(**context):
try:
ti = context['ti']
task_id = context['task'].task_id
if task_id.endswith('_PARSE'):
task_name = task_id.replace('_PARSE', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
source_filename = task_config["source_filename"]
config_file = task_config["config_file"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
if not workflow_context:
raise ValueError("No workflow_context from init task")
logging.info(f"Starting MRDS task for {task_name} with workflow context: {workflow_context}")
logging.info(f"Processing {row_count} rows from Devo connector")
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
logging.info(f"MRDS task completed successfully for {task_name}")
return "SUCCESS"
except Exception as e:
logging.error(f"Error running MRDS task: {e}", exc_info=True)
raise
def check_success_for_mopdb(**context):
try:
ti = context['ti']
dag_run = context['dag_run']
has_failures = False
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: MRDS task failed")
if has_failures:
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
return "SUCCESS"
except AirflowSkipException:
raise
except Exception as e:
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
def end_log_table_task(**context):
try:
logging.info("End log table task - always runs at the end")
return "SUCCESS"
except Exception as e:
logging.error(f"Error in end log table task: {e}", exc_info=True)
raise
def finalise_workflow_task(**context):
try:
ti = context['ti']
dag_run = context['dag_run']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
workflow_success = True
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: MRDS task failed")
trigger_mopdb_task = dag_run.get_task_instance('trigger_mopdb_dag')
if trigger_mopdb_task.state == 'failed':
workflow_success = False
failure_reasons.append("MOPDB trigger failed")
end_log_task = dag_run.get_task_instance('end_log_table')
if end_log_task.state == 'failed':
workflow_success = False
failure_reasons.append("End log table failed")
if workflow_success:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
else:
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error(f"Finalised workflow with history key {a_workflow_history_key} as FAILED")
logging.error(f"Failure reasons: {', '.join(failure_reasons)}")
raise AirflowFailException(f"Workflow failed: {', '.join(failure_reasons)}")
except AirflowFailException:
raise
except Exception as e:
logging.error(f"Error finalizing workflow: {e}", exc_info=True)
try:
if 'a_workflow_history_key' in locals() and a_workflow_history_key:
mrds_finalise_workflow(a_workflow_history_key, "N")
except:
pass
raise AirflowFailException(f"Workflow finalization failed: {e}")
init_workflow = PythonOperator(
task_id='init_workflow',
python_callable=init_workflow_task,
provide_context=True,
)
check_mopdb = PythonOperator(
task_id='check_success_for_mopdb',
python_callable=check_success_for_mopdb,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
trigger_mopdb_dag = TriggerDagRunOperator(
task_id='trigger_mopdb_dag',
trigger_dag_id='w_MOPDB_RQSD_PROCESS_MANUAL',
wait_for_completion=False,
trigger_rule=TriggerRule.NONE_FAILED,
)
end_log_table = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
trigger_rule=TriggerRule.ALL_DONE,
)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
all_tasks = []
for task_name in TASK_CONFIGS.keys():
devo_task = PythonOperator(
task_id=f'devo_{task_name}',
python_callable=run_devo_connector,
provide_context=True,
)
check_task = PythonOperator(
task_id=f'check_{task_name}',
python_callable=check_should_run_mrds,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
mrds_task = PythonOperator(
task_id=f'{task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
)
devo_task >> check_task >> mrds_task
all_tasks.extend([devo_task, check_task, mrds_task])
devo_tasks = [task for task in all_tasks if task.task_id.startswith('devo_')]
mrds_tasks = [task for task in all_tasks if task.task_id.endswith('_PARSE')]
init_workflow >> devo_tasks
for mrds_task in mrds_tasks:
mrds_task >> check_mopdb
check_mopdb >> trigger_mopdb_dag >> end_log_table >> finalize_workflow