init
This commit is contained in:
171
airflow/TestDags/archive/old_devo_rqsd_mrds_workflow.py
Normal file
171
airflow/TestDags/archive/old_devo_rqsd_mrds_workflow.py
Normal file
@@ -0,0 +1,171 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
# Import your functions
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_connector import main as devo_main
|
||||
from devo_connector_v2 import run as devo_main2
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
# Default arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
# Dynamic name extraction from basename
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow with MRDS processing',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["Devo", "RQSD", "MRDS", "Connector"],
|
||||
params={
|
||||
"source_filename": "",
|
||||
"config_file": "",
|
||||
},
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
"""Run Devo RQSD connector workflow"""
|
||||
try:
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(f"Starting Devo RQSD workflow from Airflow DAG for env '{env}'")
|
||||
|
||||
database_name = 'MOPDB'
|
||||
workflow_name = 'w_MOPDB_RQSD_PROCESS'
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
|
||||
#comment
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
count = devo_main2(workflow_context, flow_config_path, env_config_path, env)
|
||||
print("=================================================================")
|
||||
print(f"Devo RQSD workflow completed successfully with count : {count}")
|
||||
logging.info(f"Devo RQSD workflow completed successfully with count : {count}")
|
||||
|
||||
# Push the workflow context and history key to XCom for downstream tasks
|
||||
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
context['ti'].xcom_push(key='workflow_context', value=workflow_context)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
# If init_workflow succeeded but workflow failed, finalize with FAILED status
|
||||
if 'a_workflow_history_key' in locals():
|
||||
try:
|
||||
finalise_workflow(a_workflow_history_key, "FAILED")
|
||||
except Exception as finalise_error:
|
||||
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
|
||||
raise
|
||||
|
||||
def run_mrds_task(**context):
|
||||
"""Run MRDS processing task"""
|
||||
try:
|
||||
|
||||
ti = context.get('ti')
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='run_devo_connector_rqsd')
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from Task 1")
|
||||
|
||||
print("=== workflow_context ====:",workflow_context)
|
||||
|
||||
source_filename = "RQSD_OBSERVATIONS.csv"
|
||||
config_file = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_devo_process.yaml"
|
||||
|
||||
print("---- run_mrds_task ----")
|
||||
print("source_filename :", source_filename)
|
||||
print("config_file = ", config_file)
|
||||
print("------------------------")
|
||||
|
||||
|
||||
if not source_filename:
|
||||
raise ValueError("No source_filename provided in DAG run params.")
|
||||
if not config_file:
|
||||
raise ValueError("No config_file path provided in DAG run params.")
|
||||
|
||||
logging.info(f"Starting MRDS task with source_filename: {source_filename}, config_file: {config_file}")
|
||||
|
||||
# Run MRDS with the workflow context from the previous task
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=True)
|
||||
|
||||
logging.info("MRDS task completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running MRDS task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow with SUCCESS status"""
|
||||
# Pull the workflow_history_key from XCom pushed by the main task
|
||||
ti = context['ti']
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Call finalise with SUCCESS status
|
||||
finalise_workflow(a_workflow_history_key, "SUCCESS")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
|
||||
# Task definitions
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
run_mrds = PythonOperator(
|
||||
task_id='run_mrds_task',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize = PythonOperator(
|
||||
task_id='finalise_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
# Task dependencies
|
||||
run_devo >> run_mrds >> finalize
|
||||
Reference in New Issue
Block a user