init
This commit is contained in:
0
airflow/TestDags/.gitkeep
Normal file
0
airflow/TestDags/.gitkeep
Normal file
120
airflow/TestDags/IDMC_Airflow_Test.py
Normal file
120
airflow/TestDags/IDMC_Airflow_Test.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import re
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
|
||||
# from infromatic team, : connect to infromatica (akash)
|
||||
|
||||
# Utility to make task_id Airflow-safe
|
||||
def sanitize_task_id(task_id: str) -> str:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id)
|
||||
if not re.match(r'^[a-zA-Z0-9]', sanitized):
|
||||
sanitized = 'task_' + sanitized
|
||||
return sanitized
|
||||
|
||||
# Fetch parameters from Airflow Variables
|
||||
iics_username = Variable.get("iics_username")
|
||||
iics_password = Variable.get("iics_password")
|
||||
task_type = Variable.get("task_type", default_var="MTT")
|
||||
base_url = Variable.get("iics_base_url", default_var="")
|
||||
|
||||
# Task name
|
||||
CDI_task_name = "CDI_task"
|
||||
|
||||
# Default DAG args
|
||||
default_args = {
|
||||
'owner': 'infa',
|
||||
'depends_on_past': False,
|
||||
'email': ['airflow@example.com'],
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
'start_date': datetime.now() - timedelta(seconds=10),
|
||||
}
|
||||
|
||||
# API logic (same as before)
|
||||
def get_session_id(un, pw):
|
||||
data = {'@type': 'login', 'username': un, 'password': pw}
|
||||
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
|
||||
r = requests.post(base_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
return r.json()["icSessionId"], r.json()["serverUrl"]
|
||||
else:
|
||||
print('API call failed:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
|
||||
def start_job(session_id, server_url, taskname, taskType):
|
||||
job_start_url = server_url + "/api/v2/job"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType}
|
||||
r = requests.post(job_start_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
response_content = r.json()
|
||||
print(f"Job {taskname} started successfully")
|
||||
return response_content['taskId'], response_content['runId']
|
||||
else:
|
||||
print('Job failed to start:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
|
||||
def get_status(server_url, session_id, task_id, run_id):
|
||||
job_activity_url = server_url + "/api/v2/activity/activityMonitor"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
r = requests.get(job_activity_url, headers=headers)
|
||||
if r.status_code == 200:
|
||||
for obj in r.json():
|
||||
if obj['taskId'] == task_id and obj['runId'] == run_id:
|
||||
return obj['executionState']
|
||||
else:
|
||||
print('Failed to get status:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
|
||||
def execute_task(task_name):
|
||||
session_id, server_url = get_session_id(iics_username, iics_password)
|
||||
task_id, run_id = start_job(session_id, server_url, task_name, task_type)
|
||||
|
||||
log_url = f"{server_url}/api/v2/activity/activityLog/"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
|
||||
while True:
|
||||
time.sleep(15)
|
||||
status = get_status(server_url, session_id, task_id, run_id)
|
||||
print(f"Task status: {status}")
|
||||
if status not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}:
|
||||
# Fetch logs on completion
|
||||
url = f"{log_url}?taskId={task_id}&runId={run_id}"
|
||||
r = requests.get(url, headers=headers)
|
||||
logs = r.json()
|
||||
for obj in logs:
|
||||
log_id = obj['id']
|
||||
log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers)
|
||||
print(log_detail.text)
|
||||
break
|
||||
|
||||
# DAG with no schedule (manual trigger)
|
||||
dag = DAG(
|
||||
'IDMC_Airflow_Test',
|
||||
default_args=default_args,
|
||||
description='Simplified DAG with one CDI task',
|
||||
schedule_interval=None,
|
||||
catchup=False
|
||||
)
|
||||
|
||||
safe_task_id = sanitize_task_id(CDI_task_name)
|
||||
|
||||
run_cdi_task = PythonOperator(
|
||||
task_id=safe_task_id,
|
||||
python_callable=execute_task,
|
||||
op_kwargs={'task_name': CDI_task_name},
|
||||
dag=dag
|
||||
)
|
||||
142
airflow/TestDags/IICS_Airflow_Demo.py
Normal file
142
airflow/TestDags/IICS_Airflow_Demo.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import re
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.operators.dummy_operator import DummyOperator
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
|
||||
# from infromatic team, : connect to infromatica (akash)
|
||||
|
||||
# Utility to make task_id Airflow-safe
|
||||
def sanitize_task_id(task_id: str) -> str:
|
||||
# Replace invalid characters with underscores
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id)
|
||||
# Ensure task_id starts with a letter or number
|
||||
if not re.match(r'^[a-zA-Z0-9]', sanitized):
|
||||
sanitized = 'task_' + sanitized
|
||||
return sanitized
|
||||
|
||||
# Fetch parameters from Airflow Variables
|
||||
iics_username = Variable.get("iics_username")
|
||||
iics_password = Variable.get("iics_password")
|
||||
task_type = Variable.get("task_type", default_var="MTT")
|
||||
base_url = Variable.get("iics_base_url", default_var="https://dm-us.informaticacloud.com/ma/api/v2/user/login")
|
||||
|
||||
# Load task names from Airflow Variables
|
||||
CDI_task_name = json.loads(Variable.get("CDI_task_name", default_var='["Task_Date_Dim", "Task_Items", "Task_Store_Sales"]'))
|
||||
CDI_E_task_name = json.loads(Variable.get("CDI_E_task_name", default_var='["Task_Total_Store_Sales_IWDEMO"]'))
|
||||
|
||||
# Default DAG args
|
||||
default_args = {
|
||||
'owner': 'infa',
|
||||
'depends_on_past': False,
|
||||
'email': ['airflow@example.com'],
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
'start_date': datetime.now() - timedelta(seconds=10),
|
||||
'schedule': '@daily'
|
||||
}
|
||||
|
||||
# API logic
|
||||
def get_session_id(un, pw):
|
||||
session_id = ''
|
||||
data = {'@type': 'login', 'username': un, 'password': pw}
|
||||
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
|
||||
r = requests.post(base_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
session_id = r.json()["icSessionId"]
|
||||
server_url = r.json()["serverUrl"]
|
||||
else:
|
||||
print('API call failed:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
return session_id, server_url
|
||||
|
||||
def start_job(session_id, server_url, taskname, taskType):
|
||||
job_start_url = server_url + "/api/v2/job"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType}
|
||||
r = requests.post(job_start_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
response_content = r.json()
|
||||
print("Job", taskname, "started successfully")
|
||||
return response_content['taskId'], response_content['runId'], response_content['taskName']
|
||||
else:
|
||||
print('Job failed to start:', r.status_code)
|
||||
print(r.text)
|
||||
|
||||
def get_status(server_url, session_id):
|
||||
job_activity_url = server_url + "/api/v2/activity/activityMonitor"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
r = requests.get(job_activity_url, headers=headers)
|
||||
if r.status_code == 200:
|
||||
for obj in r.json():
|
||||
return obj['taskId'], obj['executionState'], obj['taskName'], obj['runId']
|
||||
else:
|
||||
print('Failed to get status:', r.status_code)
|
||||
print(r.text)
|
||||
|
||||
def execute_task(task_name):
|
||||
session_id, server_url = get_session_id(iics_username, iics_password)
|
||||
task_id, run_id, _ = start_job(session_id, server_url, task_name, task_type)
|
||||
|
||||
log_url = f"{server_url}/api/v2/activity/activityLog/"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
|
||||
while True:
|
||||
time.sleep(15)
|
||||
task_status = get_status(server_url, session_id)
|
||||
if not task_status or task_status[1] not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}:
|
||||
# Fetch log
|
||||
url = f"{log_url}?taskId={task_id}&runId={run_id}"
|
||||
r = requests.get(url, headers=headers)
|
||||
logs = r.json()
|
||||
for obj in logs:
|
||||
log_id = obj['id']
|
||||
log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers)
|
||||
print(log_detail.text)
|
||||
break
|
||||
|
||||
# Define DAG
|
||||
dag = DAG(
|
||||
'IICS_Airflow_Demo',
|
||||
default_args=default_args,
|
||||
description='A Sample IICS Airflow DAG',
|
||||
schedule_interval='@daily',
|
||||
catchup=False
|
||||
)
|
||||
|
||||
# Task group 1: CDI Tasks
|
||||
cdi_start = DummyOperator(task_id='cdi_start', dag=dag)
|
||||
cdi_end = DummyOperator(task_id='cdi_end', dag=dag)
|
||||
|
||||
for i in CDI_task_name:
|
||||
safe_task_id = 'IICS_CDI_' + sanitize_task_id(i)
|
||||
print(f"Creating task: {safe_task_id} for original task name: {i}")
|
||||
cdi_task = PythonOperator(
|
||||
task_id=safe_task_id,
|
||||
python_callable=execute_task,
|
||||
op_kwargs={'task_name': i},
|
||||
dag=dag
|
||||
)
|
||||
cdi_start >> cdi_task >> cdi_end
|
||||
|
||||
# Task group 2: CDI_E Tasks
|
||||
for j in CDI_E_task_name:
|
||||
safe_task_id = 'IICS_CDI_E_' + sanitize_task_id(j)
|
||||
print(f"Creating E task: {safe_task_id} for original task name: {j}")
|
||||
cdi_e_task = PythonOperator(
|
||||
task_id=safe_task_id,
|
||||
python_callable=execute_task,
|
||||
op_kwargs={'task_name': j},
|
||||
dag=dag
|
||||
)
|
||||
cdi_end >> cdi_e_task
|
||||
75
airflow/TestDags/archive/devo_connector_test.py
Normal file
75
airflow/TestDags/archive/devo_connector_test.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
|
||||
# Import the main function from your script
|
||||
from devo_connector import main as devo_main
|
||||
|
||||
### DEVO CONNECTOR WITH STATIC workflow (task 3)
|
||||
|
||||
|
||||
# Default DAG arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_connector_test',
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow',
|
||||
schedule_interval=None, # we can set later
|
||||
#start_date=datetime(2025, 10, 7),
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Connector'],
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
try:
|
||||
# Pick env from ENV variables
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'"
|
||||
)
|
||||
|
||||
workflow_context = {"run_id": 34, "a_workflow_history_key": 6}
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
#env = "tst"
|
||||
# flow_config_rqsd_observations.yaml
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
devo_main(workflow_context, flow_config_path, env_config_path, env)
|
||||
logging.info("Devo RQSD workflow completed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
)
|
||||
158
airflow/TestDags/archive/devo_replicator_scheduler_rar.py
Normal file
158
airflow/TestDags/archive/devo_replicator_scheduler_rar.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow import DAG
|
||||
from airflow.decorators import task
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from datetime import datetime, timedelta
|
||||
from airflow.operators.python import BranchPythonOperator
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
DAG_NAME = "dev_replicator_scheduler_rar"
|
||||
TARGET_DAG_ID = "devo_replicator_trigger_rar"
|
||||
|
||||
def get_devo_replica_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting MOPDB table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
def check_table_precondition(table_full_name):
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
sql = """
|
||||
WITH LAST_UPDATE_ORACLE AS (
|
||||
SELECT max(process_end) as process_end
|
||||
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
|
||||
WHERE upper(owner||'.'||TARGET_TABLE_NAME) = upper(:table_name)
|
||||
AND PROCESS_END is not null AND PROCESS_SUCCESSFUL='Y'
|
||||
),
|
||||
LAST_UPDATE_DEVO AS (
|
||||
SELECT CASE WHEN last_status = 'FINISHED' THEN LAST_END_TIME ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY') END as process_end
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
WHERE OWNER || '.' || TABLE_NAME = :table_name
|
||||
)
|
||||
SELECT CASE WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) > (SELECT process_end FROM LAST_UPDATE_DEVO)
|
||||
THEN 'Y' ELSE 'N' END AS TRIGGER_DEVO_REPLICATOR FROM dual
|
||||
"""
|
||||
cursor.execute(sql, table_name=table_full_name)
|
||||
result = cursor.fetchone()
|
||||
status = result[0] if result else 'N'
|
||||
logging.info(f"Precondition for {table_full_name}: {status}")
|
||||
cursor.close()
|
||||
return {"table": table_full_name, "trigger": status}
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking precondition for {table_full_name}: {e}")
|
||||
return {"table": table_full_name, "trigger": 'ERROR'}
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
def get_tables_to_trigger(precondition_results):
|
||||
triggered_tables = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
|
||||
logging.info(f"Tables meeting precondition: {triggered_tables}")
|
||||
return [{"owner_table": table_name} for table_name in triggered_tables]
|
||||
|
||||
def branch_on_tables(ti):
|
||||
precondition_results = ti.xcom_pull(task_ids='check_all_tables')
|
||||
tables_to_trigger = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
|
||||
if tables_to_trigger:
|
||||
return "trigger_devo_replicators"
|
||||
else:
|
||||
return "no_table_updated"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=2),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=DAG_NAME,
|
||||
default_args=default_args,
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['DevoScheduler', 'DevoReplicatorTrigger']
|
||||
) as dag:
|
||||
|
||||
@task()
|
||||
def fetch_tables():
|
||||
return get_devo_replica_table_options()
|
||||
|
||||
@task()
|
||||
def check_all_tables(table_list):
|
||||
results = [check_table_precondition(tbl) for tbl in table_list]
|
||||
count_y = sum(1 for r in results if r["trigger"] == "Y")
|
||||
count_n = sum(1 for r in results if r["trigger"] == "N")
|
||||
logging.info(f"Precondition results: {results}")
|
||||
logging.info(f"Tables with trigger = 'Y': {count_y}")
|
||||
logging.info(f"Tables with trigger = 'N': {count_n}")
|
||||
return results
|
||||
|
||||
@task()
|
||||
def output_tables_to_trigger(precondition_results):
|
||||
return get_tables_to_trigger(precondition_results)
|
||||
|
||||
branch_task = BranchPythonOperator(
|
||||
task_id="branch_trigger_check",
|
||||
python_callable=branch_on_tables,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
no_table_updated = EmptyOperator(task_id="no_table_updated")
|
||||
|
||||
tables = fetch_tables()
|
||||
precondition_results = check_all_tables(tables)
|
||||
tables_to_trigger = output_tables_to_trigger(precondition_results)
|
||||
|
||||
trigger_dag = TriggerDagRunOperator.partial(
|
||||
task_id="trigger_devo_replicators",
|
||||
trigger_dag_id=TARGET_DAG_ID,
|
||||
execution_date="{{ ds }}"
|
||||
).expand(conf=tables_to_trigger)
|
||||
|
||||
# Dependencies for branching
|
||||
tables >> precondition_results >> tables_to_trigger >> branch_task
|
||||
branch_task >> [trigger_dag, no_table_updated]
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
1. fetch_tables gets the list of tables.
|
||||
2. check_all_tables checks each table’s trigger status and logs counts.
|
||||
3. output_tables_to_trigger prepares the mapped parameter list for triggering downstream DAGs.
|
||||
4. branch_on_tables decides the path:
|
||||
"trigger_devo_replicators" if any table triggers.
|
||||
"no_table_updated" otherwise.
|
||||
5. BranchPythonOperator implements the conditional branching.
|
||||
6. TriggerDagRunOperator dynamically triggers a run of devo_replicator_trigger_rar per qualifying table.
|
||||
7. EmptyOperator represents the "no tables to trigger" branch.
|
||||
|
||||
"""
|
||||
112
airflow/TestDags/archive/devo_replicator_test.py
Normal file
112
airflow/TestDags/archive/devo_replicator_test.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.oracle.hooks.oracle import OracleHook
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
|
||||
p_run_id = 1234
|
||||
p_service_name = 'MyService'
|
||||
p_table_owner = 'MY_SCHEMA'
|
||||
p_table_name = 'MY_TABLE'
|
||||
p_objectstore_uri = 's3://bucket/uri' # subject to change appropriate for RAR/MOPDB
|
||||
|
||||
|
||||
def start_log_table_task(**context):
|
||||
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.start_log_table(:1, :2, :3, :4); END;"
|
||||
try:
|
||||
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
|
||||
conn = oracle_hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(proc_call, [p_run_id, p_service_name, p_table_owner, p_table_name])
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logging.info("start_log_table executed successfully.")
|
||||
except Exception as e:
|
||||
logging.error("Failed to execute start_log_table: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
def export_table_task(**context):
|
||||
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.export_table(:1, :2, :3, :4); END;"
|
||||
try:
|
||||
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
|
||||
conn = oracle_hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(proc_call, [p_service_name, p_table_owner, p_table_name, p_objectstore_uri])
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logging.info("export_table executed successfully.")
|
||||
except Exception as e:
|
||||
logging.error("Failed to execute export_table: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
def devo_impyla_task(**context):
|
||||
# Placeholder for Impyla (Devo) code
|
||||
# Example for future:
|
||||
# from impala.dbapi import connect
|
||||
# conn = connect(host="...", port=21050)
|
||||
# cursor = conn.cursor()
|
||||
# cursor.execute("...")
|
||||
logging.info("Impyla (Devo) task placeholder ran. Please implement.")
|
||||
|
||||
def end_log_table_task(**context):
|
||||
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.end_log_table(:1, :2, :3); END;"
|
||||
try:
|
||||
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
|
||||
conn = oracle_hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(proc_call, [p_service_name, p_table_owner, p_table_name])
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logging.info("end_log_table executed successfully.")
|
||||
except Exception as e:
|
||||
logging.error("Failed to execute end_log_table: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_devo_replicator_test_old',
|
||||
default_args=default_args,
|
||||
description='Run Devo replicator workflow',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Replicator'],
|
||||
) as dag:
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='start_log_table',
|
||||
python_callable=start_log_table_task,
|
||||
)
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='export_table',
|
||||
python_callable=export_table_task,
|
||||
)
|
||||
|
||||
t3 = PythonOperator(
|
||||
task_id='devo_impyla',
|
||||
python_callable=devo_impyla_task,
|
||||
)
|
||||
|
||||
t4 = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
)
|
||||
|
||||
t1 >> t2 >> t3 >> t4
|
||||
|
||||
132
airflow/TestDags/archive/devo_replicator_withSQLOperator.py
Normal file
132
airflow/TestDags/archive/devo_replicator_withSQLOperator.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from airflow import DAG
|
||||
#from airflow.providers.oracle.operators.oracle import SQLExecuteQueryOperator
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
# Import your functions
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_replicator.data_replicator.impala_refresher import main as impala_main
|
||||
|
||||
#step 5) Devo replication
|
||||
|
||||
## DEVO REPLICATOR WITH SQLOperator
|
||||
### check the oracle connection, fixed params --> test cnx
|
||||
### pick it from a file,
|
||||
|
||||
|
||||
|
||||
|
||||
# TASK :
|
||||
# - retrive directly from config file the param {0} and {1} based dev/test
|
||||
|
||||
# need to be passed from infromatic (WLA call) to dags
|
||||
# wla to airflow, cnx done
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_devo_replicator_2',
|
||||
default_args=default_args,
|
||||
description='Run Devo replicator workflow',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Replicator'],
|
||||
) as dag:
|
||||
|
||||
def init_step(**context):
|
||||
env = os.getenv("MRDS_ENV")
|
||||
corporate_store= "corporate store is 'crp_mopdb' for mopdb and 'crp_rar' for rar"
|
||||
config_path = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
|
||||
|
||||
p_service_name = 'MOPDB'
|
||||
p_table_owner = 'MPEC'
|
||||
p_table_name = 'T_MPEC'
|
||||
|
||||
#parse the config yml and filter by dev or test and mopdb or rar
|
||||
p_objectstore_uri = 'https://devo-crp-ffppyd8q.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/mopdb/db' # subject to change as appropriate
|
||||
p_run_id = str(context['ti'].run_id)
|
||||
print(f"=== DEBUG INFO : {p_run_id} ===")
|
||||
context['ti'].xcom_push(key='p_run_id', value=p_run_id)
|
||||
|
||||
init_step = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
t1 = SQLExecuteQueryOperator(
|
||||
task_id='start_log_table',
|
||||
oracle_conn_id='oracle_default',
|
||||
# failed ,open up the cnx
|
||||
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.start_log_table(:p_run_id, :p_service_name, :p_table_owner, :p_table_name); END;",
|
||||
parameters={
|
||||
'p_run_id': p_run_id,
|
||||
'p_service_name': p_service_name,
|
||||
'p_table_owner': p_table_owner,
|
||||
'p_table_name': p_table_name
|
||||
},
|
||||
#oracle_conn_id='oracle_default'
|
||||
)
|
||||
|
||||
t2 = SQLExecuteQueryOperator(
|
||||
task_id='export_table',
|
||||
oracle_conn_id='oracle_default',
|
||||
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.export_table(:p_service_name, :p_table_owner, :p_table_name, :p_objectstore_uri); END;",
|
||||
parameters={
|
||||
'p_service_name': p_service_name,
|
||||
'p_table_owner': p_table_owner,
|
||||
'p_table_name': p_table_name,
|
||||
'p_objectstore_uri': p_objectstore_uri
|
||||
},
|
||||
#oracle_conn_id='oracle_default'
|
||||
)
|
||||
|
||||
# Leaving the Devo/Impyla task as a PythonOperator (placeholder)
|
||||
from airflow.operators.python import PythonOperator
|
||||
def devo_impyla_task(**context):
|
||||
|
||||
status = impala_main(env_config_path, env, table, corporate_store)
|
||||
logging.info("Impyla (Devo) task placeholder ran. Please implement.")
|
||||
|
||||
|
||||
|
||||
# get details-data from impala ( its pending )
|
||||
|
||||
t3 = PythonOperator(
|
||||
task_id='devo_impyla',
|
||||
python_callable=devo_impyla_task,
|
||||
)
|
||||
|
||||
|
||||
# push to s3, we need to call the proc
|
||||
t4 = SQLExecuteQueryOperator(
|
||||
task_id='end_log_table',
|
||||
oracle_conn_id='oracle_default',
|
||||
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.end_log_table(:p_service_name, :p_table_owner, :p_table_name); END;",
|
||||
parameters={
|
||||
'p_service_name': p_service_name,
|
||||
'p_table_owner': p_table_owner,
|
||||
'p_table_name': p_table_name
|
||||
},
|
||||
#oracle_conn_id='oracle_default'
|
||||
)
|
||||
|
||||
# t4 need to be executed always if we succeed or not ( if t1 failed then go directly to t4)
|
||||
# t5 that will check if any of previous dag failed put everything will be read
|
||||
|
||||
init_step >> t1 >> t2 >> t3 >> t4
|
||||
65
airflow/TestDags/archive/dmarsdb1_data_replicator_test.py
Normal file
65
airflow/TestDags/archive/dmarsdb1_data_replicator_test.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from airflow import DAG
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
## OLD ( Package repliction)
|
||||
|
||||
def test_oracle_connection(**context):
|
||||
"""Test Oracle connection and log the result"""
|
||||
conn_id = "marsdb_loader"
|
||||
from airflow.providers.oracle.hooks.oracle import OracleHook
|
||||
|
||||
try:
|
||||
logger.debug("Attempting to connect to Oracle database...")
|
||||
hook = OracleHook(oracle_conn_id=conn_id)
|
||||
conn = hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT 1 FROM dual")
|
||||
result = cursor.fetchone()
|
||||
logger.info(f"Connection test successful. Result: {result}")
|
||||
cursor.close()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Connection test failed: {str(e)}")
|
||||
raise
|
||||
|
||||
default_args = {
|
||||
'depends_on_past': False,
|
||||
'start_date': datetime(2025, 6, 25),
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(seconds=15),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
'oracle_plsql_test_dag',
|
||||
default_args=default_args,
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
) as dag:
|
||||
|
||||
test_connection = PythonOperator(
|
||||
task_id='test_oracle_connection',
|
||||
python_callable=test_oracle_connection,
|
||||
)
|
||||
|
||||
# With named parameter
|
||||
run_plsql = SQLExecuteQueryOperator(
|
||||
task_id='run_plsql_procedure',
|
||||
conn_id="marsdb_loader",
|
||||
sql="""
|
||||
BEGIN
|
||||
DATA_REPLICATOR.export_table(
|
||||
p_table_owner => 'c2d',
|
||||
p_table_name => 't_all_assets_servicer',
|
||||
p_objectstore_uri => 'https://oci-test-sani.bucket.vpce-0b3a5f000733397b0-kxlyoh5z.s3.eu-central-1.vpce.amazonaws.com/',
|
||||
p_date_column => 'SNAPSHOT_DATE'
|
||||
);
|
||||
END;
|
||||
""",
|
||||
)
|
||||
|
||||
test_connection >> run_plsql
|
||||
171
airflow/TestDags/archive/old_devo_rqsd_mrds_workflow.py
Normal file
171
airflow/TestDags/archive/old_devo_rqsd_mrds_workflow.py
Normal file
@@ -0,0 +1,171 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
# Import your functions
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_connector import main as devo_main
|
||||
from devo_connector_v2 import run as devo_main2
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
# Default arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
# Dynamic name extraction from basename
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow with MRDS processing',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["Devo", "RQSD", "MRDS", "Connector"],
|
||||
params={
|
||||
"source_filename": "",
|
||||
"config_file": "",
|
||||
},
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
"""Run Devo RQSD connector workflow"""
|
||||
try:
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(f"Starting Devo RQSD workflow from Airflow DAG for env '{env}'")
|
||||
|
||||
database_name = 'MOPDB'
|
||||
workflow_name = 'w_MOPDB_RQSD_PROCESS'
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
|
||||
#comment
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
count = devo_main2(workflow_context, flow_config_path, env_config_path, env)
|
||||
print("=================================================================")
|
||||
print(f"Devo RQSD workflow completed successfully with count : {count}")
|
||||
logging.info(f"Devo RQSD workflow completed successfully with count : {count}")
|
||||
|
||||
# Push the workflow context and history key to XCom for downstream tasks
|
||||
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
context['ti'].xcom_push(key='workflow_context', value=workflow_context)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
# If init_workflow succeeded but workflow failed, finalize with FAILED status
|
||||
if 'a_workflow_history_key' in locals():
|
||||
try:
|
||||
finalise_workflow(a_workflow_history_key, "FAILED")
|
||||
except Exception as finalise_error:
|
||||
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
|
||||
raise
|
||||
|
||||
def run_mrds_task(**context):
|
||||
"""Run MRDS processing task"""
|
||||
try:
|
||||
|
||||
ti = context.get('ti')
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='run_devo_connector_rqsd')
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from Task 1")
|
||||
|
||||
print("=== workflow_context ====:",workflow_context)
|
||||
|
||||
source_filename = "RQSD_OBSERVATIONS.csv"
|
||||
config_file = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_devo_process.yaml"
|
||||
|
||||
print("---- run_mrds_task ----")
|
||||
print("source_filename :", source_filename)
|
||||
print("config_file = ", config_file)
|
||||
print("------------------------")
|
||||
|
||||
|
||||
if not source_filename:
|
||||
raise ValueError("No source_filename provided in DAG run params.")
|
||||
if not config_file:
|
||||
raise ValueError("No config_file path provided in DAG run params.")
|
||||
|
||||
logging.info(f"Starting MRDS task with source_filename: {source_filename}, config_file: {config_file}")
|
||||
|
||||
# Run MRDS with the workflow context from the previous task
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=True)
|
||||
|
||||
logging.info("MRDS task completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running MRDS task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow with SUCCESS status"""
|
||||
# Pull the workflow_history_key from XCom pushed by the main task
|
||||
ti = context['ti']
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Call finalise with SUCCESS status
|
||||
finalise_workflow(a_workflow_history_key, "SUCCESS")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
|
||||
# Task definitions
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
run_mrds = PythonOperator(
|
||||
task_id='run_mrds_task',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize = PythonOperator(
|
||||
task_id='finalise_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
# Task dependencies
|
||||
run_devo >> run_mrds >> finalize
|
||||
320
airflow/TestDags/archive/rqsd_devo_replicator_2.py
Normal file
320
airflow/TestDags/archive/rqsd_devo_replicator_2.py
Normal file
@@ -0,0 +1,320 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import yaml
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException
|
||||
except Exception: # fallback for older Airflow
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
|
||||
# --- Custom module paths (as in snippet) ---
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
sys.path.append('/opt/airflow/python/devo_replicator/data_replicator')
|
||||
|
||||
# --- custom imports ---
|
||||
from mrds.utils import oraconn
|
||||
from impala_refresher import main as impala_main
|
||||
|
||||
# --- Config path ---
|
||||
ENV_CONFIG_PATH = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_devo_replicator_2',
|
||||
default_args=default_args,
|
||||
description='Run Devo replicator workflow',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Replicator'],
|
||||
) as dag:
|
||||
|
||||
# -------------------------------
|
||||
# 1) Init: read config + set XCom
|
||||
# -------------------------------
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = conf.get("env") or os.getenv("MRDS_ENV", "dev").lower()
|
||||
if env not in {"dev", "tst"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev' or 'tst'.")
|
||||
|
||||
# hardcoded the mopdb
|
||||
# ====================================
|
||||
store = "mopdb"
|
||||
# ====================================
|
||||
|
||||
if store not in {"mopdb", "rar"}:
|
||||
raise ValueError(f"Unsupported store '{store}'. Expected 'mopdb' or 'rar'.")
|
||||
|
||||
p_service_name = "MOPDB" if store == "mopdb" else "RAR"
|
||||
p_table_owner = "MPEC"
|
||||
p_table_name = "T_MPEC"
|
||||
|
||||
with open(ENV_CONFIG_PATH, "r") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
env_cfg = cfg[env]
|
||||
store_cfg = cfg[store]
|
||||
|
||||
p_objectstore_uri = env_cfg["S3_LOCATION_URI"].replace("{0}",store.lower())
|
||||
|
||||
p_run_id = str(ti.run_id)
|
||||
logging.info("=== init_step === env=%s store=%s run_id=%s", env, store, p_run_id)
|
||||
logging.info("objectstore_uri=%s", p_objectstore_uri)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"config_path": ENV_CONFIG_PATH,
|
||||
"p_run_id": p_run_id,
|
||||
"p_service_name": p_service_name,
|
||||
"p_table_owner": p_table_owner,
|
||||
"p_table_name": p_table_name,
|
||||
"p_objectstore_uri": p_objectstore_uri,
|
||||
"corporate_store": store_cfg["corporate_store"], # "crp_mopdb" or "crp_rar"
|
||||
}
|
||||
|
||||
print(" ============= DEBUG PARAMS ============= ")
|
||||
print(xcom)
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# ------------------------------------
|
||||
# 2) log table (Oracle procedure)
|
||||
# ------------------------------------
|
||||
def start_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
|
||||
# Get parameters from XCom
|
||||
p_run_id = ti.xcom_pull(task_ids='init_step', key='p_run_id')
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# Create Oracle connection for this task
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
logging.info("Oracle connection established successfully for start_log_table")
|
||||
|
||||
# Execute Oracle procedure using oraconn.run_proc() directly
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.start_log_table',
|
||||
[p_run_id, p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("start_log_table procedure executed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in start_log_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
try:
|
||||
oracle_conn.close()
|
||||
logging.info("Oracle connection closed for start_log_table")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing connection in start_log_table: {e}")
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='start_log_table',
|
||||
python_callable=start_log_table_task,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 3) Export table (Oracle procedure writes to object store)
|
||||
# ---------------------------------------------------------
|
||||
def export_table_task(**context):
|
||||
ti = context["ti"]
|
||||
|
||||
# Get parameters from XCom
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
|
||||
|
||||
# Create Oracle connection for this task
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
logging.info("Oracle connection established successfully for export_table")
|
||||
|
||||
# Execute Oracle procedure using oraconn.run_proc() directly
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.export_table',
|
||||
[p_service_name, p_table_owner, p_table_name, p_objectstore_uri]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("export_table procedure executed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in export_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
try:
|
||||
oracle_conn.close()
|
||||
logging.info("Oracle connection closed for export_table")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing connection in export_table: {e}")
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='export_table',
|
||||
python_callable=export_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Continue even if t1 failed
|
||||
)
|
||||
|
||||
# ---------------------------------------------
|
||||
# 4) Devo / Impyla refresh (Python Package)
|
||||
# ---------------------------------------------
|
||||
def devo_impyla_task(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
corporate_store = ti.xcom_pull(task_ids='init_step', key='corporate_store')
|
||||
config_path = ti.xcom_pull(task_ids='init_step', key='config_path')
|
||||
owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
table = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# For Impala: corporate_store.table_name (e.g., crp_mopdb.T_MPEC)
|
||||
# NOT corporate_store.owner.table_name (which would be crp_mopdb.MPEC.T_MPEC - INVALID)
|
||||
table_name = table # Just "T_MPEC"
|
||||
|
||||
logging.info(
|
||||
"Starting Impyla refresh with env=%s store=%s corporate_store=%s table=%s",
|
||||
env, store, corporate_store, table_name
|
||||
)
|
||||
logging.info("Will execute: INVALIDATE METADATA %s.%s", corporate_store, table_name)
|
||||
logging.info("Will execute: COMPUTE STATS %s.%s", corporate_store, table_name)
|
||||
|
||||
try:
|
||||
# This should result in queries like:
|
||||
# INVALIDATE METADATA crp_mopdb.T_MPEC
|
||||
# COMPUTE STATS crp_mopdb.T_MPEC
|
||||
status = impala_main(config_path, env, table_name, corporate_store)
|
||||
logging.info("Impyla (Devo) task finished successfully. Status: %s", status)
|
||||
return status
|
||||
except Exception as e:
|
||||
logging.error(f"Error in devo_impyla_task: {e}")
|
||||
raise
|
||||
|
||||
t3 = PythonOperator(
|
||||
task_id='devo_impyla',
|
||||
python_callable=devo_impyla_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Continue even if t2 failed
|
||||
)
|
||||
|
||||
# -------------------------------------
|
||||
# 5) End log table (always executes after t1, t2, t3 complete - regardless of success/failure)
|
||||
# -------------------------------------
|
||||
def end_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
|
||||
# Get parameters from XCom
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# Create Oracle connection for this task
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
logging.info("Oracle connection established successfully for end_log_table")
|
||||
|
||||
# Execute Oracle procedure using oraconn.run_proc() directly
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.end_log_table',
|
||||
[p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("end_log_table procedure executed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in end_log_table: {e}")
|
||||
# Don't raise the exception since this is a cleanup task
|
||||
logging.info("Continuing despite end_log_table error (cleanup task)")
|
||||
finally:
|
||||
if oracle_conn:
|
||||
try:
|
||||
oracle_conn.close()
|
||||
logging.info("Oracle connection closed for end_log_table")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing connection in end_log_table: {e}")
|
||||
|
||||
t4 = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Run after t1, t2, t3 complete (success or failure)
|
||||
)
|
||||
|
||||
# -----------------------------------------------------
|
||||
# 6) Check and fail the DAG if any of t1..t3 actually failed
|
||||
# This task always runs after t4, but will fail the DAG if needed
|
||||
# -----------------------------------------------------
|
||||
def fail_if_any_failed(**context):
|
||||
dag_run = context['dag_run']
|
||||
check_tasks = ['start_log_table', 'export_table', 'devo_impyla']
|
||||
failed = []
|
||||
|
||||
for tid in check_tasks:
|
||||
ti_up = dag_run.get_task_instance(tid)
|
||||
if ti_up and ti_up.state == 'failed':
|
||||
failed.append(tid)
|
||||
|
||||
if failed:
|
||||
error_msg = f"Critical task(s) failed: {', '.join(failed)}. DAG execution failed."
|
||||
logging.error(error_msg)
|
||||
raise AirflowFailException(error_msg)
|
||||
|
||||
logging.info("All critical tasks completed successfully: %s", check_tasks)
|
||||
|
||||
t5 = PythonOperator(
|
||||
task_id='fail_if_any_failed',
|
||||
python_callable=fail_if_any_failed,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Always run after t4
|
||||
)
|
||||
|
||||
# ---------
|
||||
# Task Dependencies - SEQUENTIAL
|
||||
# ---------
|
||||
# Sequential flow: init -> t1 -> t2 -> t3
|
||||
init >> t1 >> t2 >> t3
|
||||
|
||||
# t4 runs after t1, t2, t3 are all done (regardless of success/failure)
|
||||
[t1, t2, t3] >> t4
|
||||
|
||||
# t5 always runs after t4 to check for failures and fail the DAG if needed
|
||||
t4 >> t5
|
||||
18
airflow/TestDags/archive/test_oracle_connection.py
Normal file
18
airflow/TestDags/archive/test_oracle_connection.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from airflow import DAG
|
||||
#from airflow.providers.oracle.operators.oracle import OracleOperator
|
||||
from airflow.operators.bash import BashOperator
|
||||
from datetime import datetime
|
||||
from airflow import DAG
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
|
||||
with DAG(
|
||||
'test_oracle_connection',
|
||||
start_date=datetime(2025, 6, 13),
|
||||
schedule_interval=None
|
||||
) as dag:
|
||||
test_query = SQLExecuteQueryOperator(
|
||||
task_id='test_oracle_query',
|
||||
conn_id='oracle_default',
|
||||
sql='SELECT 1 FROM DUAL'
|
||||
)
|
||||
|
||||
244
airflow/TestDags/ods_exdi_multi_task_processor.py
Normal file
244
airflow/TestDags/ods_exdi_multi_task_processor.py
Normal file
@@ -0,0 +1,244 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
|
||||
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
|
||||
# Configuration Dictionary - First key will be mandatory,
|
||||
# Workflow: Init → First Task (Sequential) → Parallel Tasks → Finalize
|
||||
# Input: Only needs PARSE.yaml config file and source filename
|
||||
|
||||
TASK_CONFIGS = {
|
||||
"m_ODS_EXDI_TASK1": {
|
||||
"source_filename": "EXDI_TASK1.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK1_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_EXDI_TASK2": {
|
||||
"source_filename": "EXDI_TASK2.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK2_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_EXDI_TASK3": {
|
||||
"source_filename": "EXDI_TASK3.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK3_PARSE.yaml"
|
||||
}
|
||||
}
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run EXDI data processing workflow with MRDS - Multi-task',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["EXDI", "MRDS", "Multi-Task", "ODS"]
|
||||
) as dag:
|
||||
|
||||
def init_workflow_task(**context):
|
||||
"""Initialize workflow and set up context"""
|
||||
try:
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
env = os.getenv("MRDS_ENV", "dev")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
missing_vars = []
|
||||
if not username: missing_vars.append("MRDS_LOADER_DB_USER")
|
||||
if not password: missing_vars.append("MRDS_LOADER_DB_PASS")
|
||||
if not tnsalias: missing_vars.append("MRDS_LOADER_DB_TNS")
|
||||
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
|
||||
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
# Push context to XCom for downstream tasks
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
ti.xcom_push(key='workflow_context', value=workflow_context)
|
||||
ti.xcom_push(key='env', value=env)
|
||||
|
||||
logging.info("Workflow initialization completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error initializing workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def run_mrds_task(**context):
|
||||
"""Run MRDS processing task for EXDI"""
|
||||
try:
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
# Extract task name from task_id
|
||||
task_name = task_id.replace('_PARSE', '') if task_id.endswith('_PARSE') else task_id
|
||||
|
||||
# Get task configuration
|
||||
task_config = TASK_CONFIGS.get(task_name)
|
||||
if not task_config:
|
||||
raise ValueError(f"No configuration found for task: {task_name}")
|
||||
|
||||
source_filename = task_config["source_filename"]
|
||||
config_file = task_config["config_file"]
|
||||
|
||||
# Get context from init task
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
|
||||
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from init task")
|
||||
|
||||
# Verify config file exists
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"PARSE config file not found: {config_file}")
|
||||
|
||||
logging.info(f"Processing EXDI MRDS with source_filename: {source_filename}, config_file: {config_file}")
|
||||
|
||||
# Run MRDS - file should already be in bucket from EXDI flow
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
|
||||
|
||||
logging.info(f"EXDI MRDS task completed successfully for {task_name}")
|
||||
|
||||
# Push success status to XCom
|
||||
ti.xcom_push(key='mrds_success', value=True)
|
||||
ti.xcom_push(key='task_status', value='SUCCESS')
|
||||
|
||||
return "SUCCESS"
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running EXDI MRDS task: {e}", exc_info=True)
|
||||
|
||||
# Push failure status to XCom
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='mrds_success', value=False)
|
||||
ti.xcom_push(key='task_status', value='FAILED')
|
||||
ti.xcom_push(key='error_message', value=str(e))
|
||||
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow based on overall execution results"""
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Check all task statuses
|
||||
workflow_success = True
|
||||
failure_reasons = []
|
||||
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
mrds_task = dag_run.get_task_instance(mrds_task_id)
|
||||
|
||||
if mrds_task.state == 'failed':
|
||||
workflow_success = False
|
||||
try:
|
||||
error_msg = ti.xcom_pull(key='error_message', task_ids=mrds_task_id)
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed - {error_msg}")
|
||||
except:
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed")
|
||||
|
||||
# Finalize workflow
|
||||
if workflow_success:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info(f"Finalised EXDI workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
else:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error(f"Finalised EXDI workflow with history key {a_workflow_history_key} as FAILED")
|
||||
raise AirflowFailException(f"EXDI Workflow failed: {', '.join(failure_reasons)}")
|
||||
|
||||
except AirflowFailException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error finalizing EXDI workflow: {e}", exc_info=True)
|
||||
try:
|
||||
if 'a_workflow_history_key' in locals() and a_workflow_history_key:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
except:
|
||||
pass
|
||||
raise AirflowFailException(f"EXDI Workflow finalization failed: {e}")
|
||||
|
||||
# Create tasks
|
||||
init_workflow = PythonOperator(
|
||||
task_id='init_workflow',
|
||||
python_callable=init_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize_workflow = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Get task names - first task runs sequentially, others in parallel
|
||||
task_names = list(TASK_CONFIGS.keys())
|
||||
first_task_name = task_names[0]
|
||||
parallel_task_names = task_names[1:]
|
||||
|
||||
# Create first task (sequential)
|
||||
first_mrds_task = PythonOperator(
|
||||
task_id=f'{first_task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
# Set dependencies for first task
|
||||
init_workflow >> first_mrds_task >> finalize_workflow
|
||||
|
||||
# Create parallel tasks
|
||||
for task_name in parallel_task_names:
|
||||
mrds_task = PythonOperator(
|
||||
task_id=f'{task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
# Parallel tasks start after first task completes
|
||||
first_mrds_task >> mrds_task >> finalize_workflow
|
||||
|
||||
logging.info(f"EXDI DAG created with {len(TASK_CONFIGS)} tasks: {list(TASK_CONFIGS.keys())}")
|
||||
44
airflow/TestDags/pen_test_demo.py
Normal file
44
airflow/TestDags/pen_test_demo.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from datetime import datetime
|
||||
from airflow import DAG
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
|
||||
|
||||
# RUN it if aiflow wokrs or not.
|
||||
|
||||
def print_statement():
|
||||
print("Hello from the Python function!!!!!")
|
||||
|
||||
def print_message():
|
||||
print("Last message from Python!, hope things are going good")
|
||||
|
||||
with DAG(
|
||||
'demo_task_workflow',
|
||||
start_date=datetime(2025, 6, 13),
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
) as dag:
|
||||
|
||||
task1 = BashOperator(
|
||||
task_id='print_with_bash',
|
||||
bash_command='echo "Lets begin"',
|
||||
)
|
||||
|
||||
task2 = PythonOperator(
|
||||
task_id='print_with_python',
|
||||
python_callable=print_statement,
|
||||
)
|
||||
|
||||
task3 = BashOperator(
|
||||
task_id='another_bash_task',
|
||||
bash_command='echo "So far so good!"',
|
||||
)
|
||||
|
||||
task4 = PythonOperator(
|
||||
task_id='another_python_task',
|
||||
python_callable=print_message,
|
||||
)
|
||||
|
||||
|
||||
task1 >> task2 >> task3 >> task4
|
||||
|
||||
95
airflow/TestDags/rqsd_casper_connector_test.py
Normal file
95
airflow/TestDags/rqsd_casper_connector_test.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
|
||||
### CASPER CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
|
||||
# Importing custom module#s
|
||||
sys.path.append('/opt/airflow/python/connectors/casper')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
"""username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS") """
|
||||
|
||||
# connstr = f"{username}/{password}@{tnsalias}"
|
||||
|
||||
# Importing the main function from casper script
|
||||
from casper_rqsd import main as casper_main
|
||||
|
||||
# Default DAG arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_casper_connector_test',
|
||||
default_args=default_args,
|
||||
description='Run Casper RQSD data ingestion workflow',
|
||||
schedule_interval=None, # we can set later
|
||||
#start_date=datetime(2025, 10, 7),
|
||||
catchup=False,
|
||||
tags=['Casper', 'RQSD', 'Connector'],
|
||||
) as dag:
|
||||
|
||||
|
||||
def run_casper_rqsd(**context):
|
||||
try:
|
||||
#workflow_context = {"run_id": 34, "a_workflow_history_key": 6}
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_casper.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/casper/config/env_config.yaml"
|
||||
|
||||
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
# Pick env from ENV variables
|
||||
env = os.getenv("MRDS_ENV")
|
||||
#env = os.getenv("MRDS_ENV", "lab")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'"
|
||||
)
|
||||
|
||||
#Calling main()
|
||||
casper_main(workflow_context, flow_config_path, env_config_path, env)
|
||||
|
||||
logging.info("Casper RQSD workflow completed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Casper RQSD workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
run_casper = PythonOperator(
|
||||
task_id='run_casper_rqsd',
|
||||
python_callable=run_casper_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
117
airflow/TestDags/t_MOPDB_RQSD_DEVO_OBSERVATIONS.py
Normal file
117
airflow/TestDags/t_MOPDB_RQSD_DEVO_OBSERVATIONS.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
|
||||
# Import your functions from manage_runs and devo_connector as before
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_connector import main as devo_main
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
# dynamic name extracton from basename
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["Devo", "RQSD", "Connector"],
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
try:
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'")
|
||||
print("======== THIS ... =========")
|
||||
print("======== THIS ... =========")
|
||||
database_name = 'MOPDB'
|
||||
workflow_name = 'w_MOPDB_RQSD_PROCESS'
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
devo_main(workflow_context, flow_config_path, env_config_path, env)
|
||||
logging.info("Devo RQSD workflow completed successfully")
|
||||
|
||||
# Push the workflow history key to XCom for downstream tasks
|
||||
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
# If init_workflow succeeded but workflow failed, finalize with FAILED status
|
||||
# Attempt to retrieve the key to finalize
|
||||
if 'a_workflow_history_key' in locals():
|
||||
try:
|
||||
finalise_workflow(a_workflow_history_key, "FAILED")
|
||||
except Exception as finalise_error:
|
||||
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
# Pull the workflow_history_key from XCom pushed by the main task
|
||||
ti = context['ti']
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Call finalise with SUCCESS status
|
||||
finalise_workflow(a_workflow_history_key, "SUCCESS")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize = PythonOperator(
|
||||
task_id='finalise_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
run_devo >> finalize
|
||||
Reference in New Issue
Block a user