init
This commit is contained in:
524
airflow/ods/rqsd/rqsd_process/dags/w_ODS_RQSD_PROCESS.py
Normal file
524
airflow/ods/rqsd/rqsd_process/dags/w_ODS_RQSD_PROCESS.py
Normal file
@@ -0,0 +1,524 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.operators.dummy import DummyOperator
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from devo_connector import DevoConnector
|
||||
from mrds.core import main as mrds_main
|
||||
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
|
||||
|
||||
TASK_CONFIGS = {
|
||||
"m_ODS_RQSD_OBSERVATIONS": {
|
||||
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS.yaml",
|
||||
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
|
||||
"source_filename": "RQSD_OBSERVATIONS.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_RQSD_FX": {
|
||||
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX.yaml",
|
||||
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
|
||||
"source_filename": "RQSD_FX.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_RQSD_SUBA_DEVO": {
|
||||
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO.yaml",
|
||||
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
|
||||
"source_filename": "RQSD_SUBA_DEVO.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO_PARSE.yaml"
|
||||
}
|
||||
}
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow with conditional MRDS processing - Multi-task',
|
||||
#schedule_interval='*/10 * * * *', # every 10 minutes
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
max_active_runs=1,
|
||||
tags=["Devo", "RQSD", "MRDS", "Connector", "Multi-Task"]
|
||||
) as dag:
|
||||
|
||||
|
||||
def init_workflow_task(**context):
|
||||
try:
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
env_vars = {
|
||||
'MRDS_ENV': os.getenv("MRDS_ENV"),
|
||||
'MRDS_LOADER_DB_USER': os.getenv("MRDS_LOADER_DB_USER"),
|
||||
'MRDS_LOADER_DB_PASS': '***MASKED***' if os.getenv("MRDS_LOADER_DB_PASS") else None,
|
||||
'MRDS_LOADER_DB_TNS': os.getenv("MRDS_LOADER_DB_TNS"),
|
||||
'BUCKET_NAMESPACE': os.getenv("BUCKET_NAMESPACE"),
|
||||
}
|
||||
|
||||
for key, value in env_vars.items():
|
||||
logging.info(f"{key}: {value}")
|
||||
|
||||
run_id_value = get_verified_run_id(context)
|
||||
|
||||
logging.info(f"Task ID: {context.get('task_instance_key_str', 'N/A')}")
|
||||
logging.info(f"Run ID: {run_id_value}")
|
||||
logging.info(f"Execution Date: {context.get('execution_date', 'N/A')}")
|
||||
|
||||
dag_obj = context.get('dag')
|
||||
dag_id_str = dag_obj.dag_id if dag_obj else 'N/A'
|
||||
logging.info(f"DAG ID: {dag_id_str}")
|
||||
|
||||
env = os.getenv("MRDS_ENV", "dev")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
missing_vars = []
|
||||
if not username:
|
||||
missing_vars.append("MRDS_LOADER_DB_USER")
|
||||
if not password:
|
||||
missing_vars.append("MRDS_LOADER_DB_PASS")
|
||||
if not tnsalias:
|
||||
missing_vars.append("MRDS_LOADER_DB_TNS")
|
||||
|
||||
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
|
||||
logging.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logging.info(f"Initializing {workflow_name} workflow for env '{env}'")
|
||||
|
||||
workflow_run_id = run_id_value
|
||||
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
ti.xcom_push(key='workflow_context', value=workflow_context)
|
||||
ti.xcom_push(key='env', value=env)
|
||||
|
||||
logging.info("Workflow initialization completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error initializing workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def run_devo_connector(**context):
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
try:
|
||||
if task_id.startswith('devo_'):
|
||||
task_name = task_id.replace('devo_', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
task_config = TASK_CONFIGS.get(task_name)
|
||||
if not task_config:
|
||||
error_msg = f"No configuration found for task: {task_name}"
|
||||
logging.error(error_msg)
|
||||
ti.xcom_push(key='row_count', value=0)
|
||||
ti.xcom_push(key='devo_success', value=False)
|
||||
ti.xcom_push(key='should_run_mrds', value=False)
|
||||
ti.xcom_push(key='error_message', value=error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
flow_config_path = task_config["flow_config_path"]
|
||||
env_config_path = task_config["env_config_path"]
|
||||
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
|
||||
env = ti.xcom_pull(key='env', task_ids='init_workflow')
|
||||
|
||||
if not workflow_context:
|
||||
error_msg = "No workflow_context from init task"
|
||||
logging.error(error_msg)
|
||||
ti.xcom_push(key='row_count', value=0)
|
||||
ti.xcom_push(key='devo_success', value=False)
|
||||
ti.xcom_push(key='should_run_mrds', value=False)
|
||||
ti.xcom_push(key='error_message', value=error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logging.info(f"Starting Devo connector for env '{env}' - {task_name}")
|
||||
|
||||
devo_connector = DevoConnector(
|
||||
flow_config_path=flow_config_path,
|
||||
env_config_path=env_config_path,
|
||||
env=env,
|
||||
logger=logging.getLogger(f"devo_connector_{env}_{task_name}")
|
||||
)
|
||||
|
||||
row_count = devo_connector.run(workflow_context)
|
||||
|
||||
logging.info(f"Devo connector completed successfully for {task_name}. Processed {row_count} rows.")
|
||||
|
||||
ti.xcom_push(key='row_count', value=row_count)
|
||||
ti.xcom_push(key='devo_success', value=True)
|
||||
ti.xcom_push(key='should_run_mrds', value=row_count > 0)
|
||||
ti.xcom_push(key='error_message', value=None)
|
||||
|
||||
return row_count
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error running Devo connector: {str(e)}"
|
||||
logging.error(error_msg, exc_info=True)
|
||||
ti.xcom_push(key='row_count', value=0)
|
||||
ti.xcom_push(key='devo_success', value=False)
|
||||
ti.xcom_push(key='should_run_mrds', value=False)
|
||||
ti.xcom_push(key='error_message', value=error_msg)
|
||||
raise
|
||||
|
||||
def check_should_run_mrds(**context):
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
if task_id.startswith('check_'):
|
||||
task_name = task_id.replace('check_', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
|
||||
devo_task_instance = dag_run.get_task_instance(devo_task_id)
|
||||
logging.info(f"Devo task {devo_task_id} state: {devo_task_instance.state}")
|
||||
|
||||
if devo_task_instance.state == 'failed':
|
||||
error_msg = ti.xcom_pull(key='error_message', task_ids=devo_task_id)
|
||||
logging.info(f"Devo connector failed for {task_name} - skipping MRDS task. Error: {error_msg}")
|
||||
raise AirflowSkipException(f"Devo connector failed for {task_name}")
|
||||
|
||||
should_run_mrds = ti.xcom_pull(key='should_run_mrds', task_ids=devo_task_id)
|
||||
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
|
||||
|
||||
if task_name == "m_ODS_RQSD_OBSERVATIONS" and (not should_run_mrds or row_count == 0):
|
||||
logging.info(f"OBSERVATIONS task has no data (row_count: {row_count}) - marking to skip all subsequent tasks")
|
||||
ti.xcom_push(key='skip_all_tasks', value=True)
|
||||
raise AirflowSkipException(f"No OBSERVATIONS data found (row_count: {row_count}) - skipping all subsequent processing")
|
||||
|
||||
if not should_run_mrds or row_count == 0:
|
||||
logging.info(f"Skipping MRDS task for {task_name} - row count: {row_count}")
|
||||
raise AirflowSkipException(f"No data to process for {task_name} (row_count: {row_count})")
|
||||
|
||||
logging.info(f"MRDS task should run for {task_name} - row count: {row_count}")
|
||||
return True
|
||||
|
||||
def check_should_run_parallel_tasks(**context):
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
if task_id.startswith('check_'):
|
||||
task_name = task_id.replace('check_', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
skip_all_tasks = ti.xcom_pull(key='skip_all_tasks', task_ids='check_m_ODS_RQSD_OBSERVATIONS')
|
||||
|
||||
if skip_all_tasks:
|
||||
logging.info(f"Skipping {task_name} - OBSERVATIONS task had no data")
|
||||
raise AirflowSkipException(f"Skipping {task_name} - OBSERVATIONS task had no data")
|
||||
|
||||
return check_should_run_mrds(**context)
|
||||
|
||||
def run_mrds_task(**context):
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
try:
|
||||
if task_id.endswith('_PARSE'):
|
||||
task_name = task_id.replace('_PARSE', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
|
||||
task_config = TASK_CONFIGS.get(task_name)
|
||||
if not task_config:
|
||||
raise ValueError(f"No configuration found for task: {task_name}")
|
||||
|
||||
source_filename = task_config["source_filename"]
|
||||
config_file = task_config["config_file"]
|
||||
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
|
||||
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
|
||||
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from init task")
|
||||
|
||||
logging.info(f"Starting MRDS task for {task_name} with workflow context: {workflow_context}")
|
||||
logging.info(f"Processing {row_count} rows from Devo connector")
|
||||
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
|
||||
|
||||
logging.info(f"MRDS task completed successfully for {task_name}")
|
||||
return "SUCCESS"
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running MRDS task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def check_success_for_mopdb(**context):
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
has_failures = False
|
||||
failure_reasons = []
|
||||
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
|
||||
devo_task = dag_run.get_task_instance(devo_task_id)
|
||||
mrds_task = dag_run.get_task_instance(mrds_task_id)
|
||||
|
||||
if devo_task.state == 'failed':
|
||||
has_failures = True
|
||||
failure_reasons.append(f"{task_name}: Devo connector failed")
|
||||
|
||||
if mrds_task.state == 'failed':
|
||||
has_failures = True
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed")
|
||||
|
||||
if has_failures:
|
||||
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
all_skipped = all(
|
||||
dag_run.get_task_instance(f'{task_name}_PARSE').state == 'skipped'
|
||||
for task_name in TASK_CONFIGS.keys()
|
||||
)
|
||||
|
||||
if all_skipped:
|
||||
error_msg = "All MRDS tasks were skipped (no data to process) - skipping MOPDB trigger"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
|
||||
return "SUCCESS"
|
||||
|
||||
except AirflowSkipException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
|
||||
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
|
||||
|
||||
def end_log_table_task(**context):
|
||||
try:
|
||||
logging.info("End log table task - always runs at the end")
|
||||
return "SUCCESS"
|
||||
except Exception as e:
|
||||
logging.error(f"Error in end log table task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
a_workflow_history_key = None
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
workflow_success = True
|
||||
failure_reasons = []
|
||||
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
|
||||
devo_task = dag_run.get_task_instance(devo_task_id)
|
||||
mrds_task = dag_run.get_task_instance(mrds_task_id)
|
||||
|
||||
if devo_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append(f"{task_name}: Devo connector failed")
|
||||
|
||||
if mrds_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed")
|
||||
elif mrds_task.state == 'skipped':
|
||||
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
|
||||
devo_success = ti.xcom_pull(key='devo_success', task_ids=devo_task_id)
|
||||
|
||||
if devo_success and row_count == 0:
|
||||
logging.info(f"{task_name} - MRDS task was skipped due to no data - this is normal")
|
||||
elif not devo_success:
|
||||
workflow_success = False
|
||||
failure_reasons.append(f"{task_name}: Devo connector failed, MRDS skipped")
|
||||
|
||||
trigger_mopdb_task = dag_run.get_task_instance('trigger_mopdb_dag')
|
||||
if trigger_mopdb_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append("MOPDB trigger failed")
|
||||
|
||||
end_log_task = dag_run.get_task_instance('end_log_table')
|
||||
if end_log_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append("End log table failed")
|
||||
|
||||
if workflow_success:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
else:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error(f"Finalised workflow with history key {a_workflow_history_key} as FAILED")
|
||||
logging.error(f"Failure reasons: {', '.join(failure_reasons)}")
|
||||
raise AirflowFailException(f"Workflow failed: {', '.join(failure_reasons)}")
|
||||
|
||||
except AirflowFailException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error finalizing workflow: {e}", exc_info=True)
|
||||
try:
|
||||
if a_workflow_history_key:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
except:
|
||||
pass
|
||||
raise AirflowFailException(f"Workflow finalization failed: {e}")
|
||||
|
||||
init_workflow = PythonOperator(
|
||||
task_id='init_workflow',
|
||||
python_callable=init_workflow_task,
|
||||
provide_context=True,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
check_mopdb = PythonOperator(
|
||||
task_id='check_success_for_mopdb',
|
||||
python_callable=check_success_for_mopdb,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
trigger_mopdb_dag = TriggerDagRunOperator(
|
||||
task_id='trigger_mopdb_dag',
|
||||
trigger_dag_id='w_MOPDB_RQSD_PROCESS',
|
||||
wait_for_completion=False,
|
||||
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
end_log_table = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
finalize_workflow = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
task_names = list(TASK_CONFIGS.keys())
|
||||
first_task_name = task_names[0]
|
||||
parallel_task_names = task_names[1:]
|
||||
|
||||
first_devo_task = PythonOperator(
|
||||
task_id=f'devo_{first_task_name}',
|
||||
python_callable=run_devo_connector,
|
||||
provide_context=True,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
first_check_task = PythonOperator(
|
||||
task_id=f'check_{first_task_name}',
|
||||
python_callable=check_should_run_mrds,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
first_mrds_task = PythonOperator(
|
||||
task_id=f'{first_task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
parallel_tasks = []
|
||||
for task_name in parallel_task_names:
|
||||
devo_task = PythonOperator(
|
||||
task_id=f'devo_{task_name}',
|
||||
python_callable=run_devo_connector,
|
||||
provide_context=True,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
check_task = PythonOperator(
|
||||
task_id=f'check_{task_name}',
|
||||
python_callable=check_should_run_parallel_tasks,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
mrds_task = PythonOperator(
|
||||
task_id=f'{task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
first_mrds_task >> devo_task >> check_task >> mrds_task
|
||||
parallel_tasks.extend([devo_task, check_task, mrds_task])
|
||||
|
||||
init_workflow >> first_devo_task >> first_check_task >> first_mrds_task
|
||||
|
||||
all_final_tasks = [first_mrds_task]
|
||||
for task_name in parallel_task_names:
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
for task in parallel_tasks:
|
||||
if task.task_id == mrds_task_id:
|
||||
all_final_tasks.append(task)
|
||||
break
|
||||
|
||||
for task in all_final_tasks:
|
||||
task >> check_mopdb
|
||||
|
||||
check_mopdb >> end_log_table >> trigger_mopdb_dag >> finalize_workflow
|
||||
424
airflow/ods/rqsd/rqsd_process/dags/w_ODS_RQSD_PROCESS_MANUAL.py
Normal file
424
airflow/ods/rqsd/rqsd_process/dags/w_ODS_RQSD_PROCESS_MANUAL.py
Normal file
@@ -0,0 +1,424 @@
|
||||
"""
|
||||
DAG ID set to w_ODS_RQSD_PROCESS_MANUAL
|
||||
Removed OBSERVATIONS from TASK_CONFIGS (only FX and SUBA_DEVO remain)
|
||||
Updated run_devo_connector to always set should_run_mrds=True (no row count check)
|
||||
Simplified check_should_run_mrds to always proceed if devo task succeeded (manual mode)
|
||||
Removed special OBSERVATIONS handling logic
|
||||
Trigger DAG ID changed to w_MOPDB_RQSD_PROCESS_MANUAL
|
||||
Added "MANUAL" tag
|
||||
Both FX and SUBA_DEVO tasks run in parallel from init
|
||||
Removed skipped task handling for no data scenarios in finalize
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.operators.dummy import DummyOperator
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from devo_connector import DevoConnector
|
||||
from mrds.core import main as mrds_main
|
||||
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
|
||||
|
||||
TASK_CONFIGS = {
|
||||
"m_ODS_RQSD_FX": {
|
||||
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX.yaml",
|
||||
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
|
||||
"source_filename": "RQSD_FX.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_RQSD_SUBA_DEVO": {
|
||||
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO.yaml",
|
||||
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
|
||||
"source_filename": "RQSD_SUBA_DEVO.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO_PARSE.yaml"
|
||||
}
|
||||
}
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
dag_id = "w_ODS_RQSD_PROCESS_MANUAL"
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Manual trigger for RQSD data processing',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["Devo", "RQSD", "MRDS", "Connector", "Multi-Task", "MANUAL"]
|
||||
) as dag:
|
||||
|
||||
def init_workflow_task(**context):
|
||||
try:
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
env_vars = {
|
||||
'MRDS_ENV': os.getenv("MRDS_ENV"),
|
||||
'MRDS_LOADER_DB_USER': os.getenv("MRDS_LOADER_DB_USER"),
|
||||
'MRDS_LOADER_DB_PASS': '***MASKED***' if os.getenv("MRDS_LOADER_DB_PASS") else None,
|
||||
'MRDS_LOADER_DB_TNS': os.getenv("MRDS_LOADER_DB_TNS"),
|
||||
'BUCKET_NAMESPACE': os.getenv("BUCKET_NAMESPACE"),
|
||||
}
|
||||
|
||||
for key, value in env_vars.items():
|
||||
logging.info(f"{key}: {value}")
|
||||
|
||||
run_id_value = get_verified_run_id(context)
|
||||
|
||||
logging.info(f"Task ID: {context.get('task_instance_key_str', 'N/A')}")
|
||||
logging.info(f"Run ID: {run_id_value}")
|
||||
logging.info(f"Execution Date: {context.get('execution_date', 'N/A')}")
|
||||
|
||||
dag_obj = context.get('dag')
|
||||
dag_id_str = dag_obj.dag_id if dag_obj else 'N/A'
|
||||
logging.info(f"DAG ID: {dag_id_str}")
|
||||
|
||||
env = os.getenv("MRDS_ENV", "dev")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
missing_vars = []
|
||||
if not username:
|
||||
missing_vars.append("MRDS_LOADER_DB_USER")
|
||||
if not password:
|
||||
missing_vars.append("MRDS_LOADER_DB_PASS")
|
||||
if not tnsalias:
|
||||
missing_vars.append("MRDS_LOADER_DB_TNS")
|
||||
|
||||
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
|
||||
logging.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logging.info(f"Initializing {workflow_name} workflow for env '{env}'")
|
||||
|
||||
workflow_run_id = run_id_value
|
||||
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
ti.xcom_push(key='workflow_context', value=workflow_context)
|
||||
ti.xcom_push(key='env', value=env)
|
||||
|
||||
logging.info("Workflow initialization completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error initializing workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def run_devo_connector(**context):
|
||||
try:
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
if task_id.startswith('devo_'):
|
||||
task_name = task_id.replace('devo_', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
task_config = TASK_CONFIGS.get(task_name)
|
||||
if not task_config:
|
||||
raise ValueError(f"No configuration found for task: {task_name}")
|
||||
|
||||
flow_config_path = task_config["flow_config_path"]
|
||||
env_config_path = task_config["env_config_path"]
|
||||
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
|
||||
env = ti.xcom_pull(key='env', task_ids='init_workflow')
|
||||
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from init task")
|
||||
|
||||
logging.info(f"Starting Devo connector for env '{env}' - {task_name}")
|
||||
|
||||
devo_connector = DevoConnector(
|
||||
flow_config_path=flow_config_path,
|
||||
env_config_path=env_config_path,
|
||||
env=env,
|
||||
logger=logging.getLogger(f"devo_connector_{env}_{task_name}")
|
||||
)
|
||||
|
||||
row_count = devo_connector.run(workflow_context)
|
||||
|
||||
logging.info(f"Devo connector completed successfully for {task_name}. Processed {row_count} rows.")
|
||||
|
||||
ti.xcom_push(key='row_count', value=row_count)
|
||||
ti.xcom_push(key='devo_success', value=True)
|
||||
ti.xcom_push(key='should_run_mrds', value=True)
|
||||
|
||||
return row_count
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo connector: {e}", exc_info=True)
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='row_count', value=0)
|
||||
ti.xcom_push(key='devo_success', value=False)
|
||||
ti.xcom_push(key='should_run_mrds', value=False)
|
||||
raise
|
||||
|
||||
def check_should_run_mrds(**context):
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
if task_id.startswith('check_'):
|
||||
task_name = task_id.replace('check_', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
|
||||
devo_task_instance = dag_run.get_task_instance(devo_task_id)
|
||||
logging.info(f"Devo task state: {devo_task_instance.state}")
|
||||
|
||||
if devo_task_instance.state == 'failed':
|
||||
logging.info(f"Devo connector failed for {task_name} - skipping MRDS task")
|
||||
raise AirflowSkipException(f"Devo connector failed for {task_name}")
|
||||
|
||||
logging.info(f"MRDS task will run for {task_name} - manual trigger mode")
|
||||
return True
|
||||
|
||||
def run_mrds_task(**context):
|
||||
try:
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
if task_id.endswith('_PARSE'):
|
||||
task_name = task_id.replace('_PARSE', '')
|
||||
else:
|
||||
task_name = task_id
|
||||
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
|
||||
task_config = TASK_CONFIGS.get(task_name)
|
||||
if not task_config:
|
||||
raise ValueError(f"No configuration found for task: {task_name}")
|
||||
|
||||
source_filename = task_config["source_filename"]
|
||||
config_file = task_config["config_file"]
|
||||
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
|
||||
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
|
||||
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from init task")
|
||||
|
||||
logging.info(f"Starting MRDS task for {task_name} with workflow context: {workflow_context}")
|
||||
logging.info(f"Processing {row_count} rows from Devo connector")
|
||||
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
|
||||
|
||||
logging.info(f"MRDS task completed successfully for {task_name}")
|
||||
return "SUCCESS"
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running MRDS task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def check_success_for_mopdb(**context):
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
has_failures = False
|
||||
failure_reasons = []
|
||||
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
|
||||
devo_task = dag_run.get_task_instance(devo_task_id)
|
||||
mrds_task = dag_run.get_task_instance(mrds_task_id)
|
||||
|
||||
if devo_task.state == 'failed':
|
||||
has_failures = True
|
||||
failure_reasons.append(f"{task_name}: Devo connector failed")
|
||||
|
||||
if mrds_task.state == 'failed':
|
||||
has_failures = True
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed")
|
||||
|
||||
if has_failures:
|
||||
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
|
||||
return "SUCCESS"
|
||||
|
||||
except AirflowSkipException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
|
||||
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
|
||||
|
||||
def end_log_table_task(**context):
|
||||
try:
|
||||
logging.info("End log table task - always runs at the end")
|
||||
return "SUCCESS"
|
||||
except Exception as e:
|
||||
logging.error(f"Error in end log table task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
workflow_success = True
|
||||
failure_reasons = []
|
||||
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
devo_task_id = f'devo_{task_name}'
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
|
||||
devo_task = dag_run.get_task_instance(devo_task_id)
|
||||
mrds_task = dag_run.get_task_instance(mrds_task_id)
|
||||
|
||||
if devo_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append(f"{task_name}: Devo connector failed")
|
||||
|
||||
if mrds_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed")
|
||||
|
||||
trigger_mopdb_task = dag_run.get_task_instance('trigger_mopdb_dag')
|
||||
if trigger_mopdb_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append("MOPDB trigger failed")
|
||||
|
||||
end_log_task = dag_run.get_task_instance('end_log_table')
|
||||
if end_log_task.state == 'failed':
|
||||
workflow_success = False
|
||||
failure_reasons.append("End log table failed")
|
||||
|
||||
if workflow_success:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
else:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error(f"Finalised workflow with history key {a_workflow_history_key} as FAILED")
|
||||
logging.error(f"Failure reasons: {', '.join(failure_reasons)}")
|
||||
raise AirflowFailException(f"Workflow failed: {', '.join(failure_reasons)}")
|
||||
|
||||
except AirflowFailException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error finalizing workflow: {e}", exc_info=True)
|
||||
try:
|
||||
if 'a_workflow_history_key' in locals() and a_workflow_history_key:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
except:
|
||||
pass
|
||||
raise AirflowFailException(f"Workflow finalization failed: {e}")
|
||||
|
||||
init_workflow = PythonOperator(
|
||||
task_id='init_workflow',
|
||||
python_callable=init_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
check_mopdb = PythonOperator(
|
||||
task_id='check_success_for_mopdb',
|
||||
python_callable=check_success_for_mopdb,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
trigger_mopdb_dag = TriggerDagRunOperator(
|
||||
task_id='trigger_mopdb_dag',
|
||||
trigger_dag_id='w_MOPDB_RQSD_PROCESS_MANUAL',
|
||||
wait_for_completion=False,
|
||||
trigger_rule=TriggerRule.NONE_FAILED,
|
||||
)
|
||||
|
||||
end_log_table = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
finalize_workflow = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
all_tasks = []
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
devo_task = PythonOperator(
|
||||
task_id=f'devo_{task_name}',
|
||||
python_callable=run_devo_connector,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
check_task = PythonOperator(
|
||||
task_id=f'check_{task_name}',
|
||||
python_callable=check_should_run_mrds,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
mrds_task = PythonOperator(
|
||||
task_id=f'{task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
devo_task >> check_task >> mrds_task
|
||||
all_tasks.extend([devo_task, check_task, mrds_task])
|
||||
|
||||
devo_tasks = [task for task in all_tasks if task.task_id.startswith('devo_')]
|
||||
mrds_tasks = [task for task in all_tasks if task.task_id.endswith('_PARSE')]
|
||||
|
||||
init_workflow >> devo_tasks
|
||||
|
||||
for mrds_task in mrds_tasks:
|
||||
mrds_task >> check_mopdb
|
||||
|
||||
check_mopdb >> trigger_mopdb_dag >> end_log_table >> finalize_workflow
|
||||
Reference in New Issue
Block a user