This commit is contained in:
Grzegorz Michalski
2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions

View File

@@ -0,0 +1,158 @@
import os
import sys
import logging
from airflow.decorators import dag
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow import DAG
from airflow.decorators import task
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from datetime import datetime, timedelta
from airflow.operators.python import BranchPythonOperator
from airflow.operators.empty import EmptyOperator
from mrds.utils import oraconn
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
DAG_NAME = "dev_replicator_scheduler_rar"
TARGET_DAG_ID = "devo_replicator_trigger_rar"
def get_devo_replica_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting MOPDB table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
def check_table_precondition(table_full_name):
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
sql = """
WITH LAST_UPDATE_ORACLE AS (
SELECT max(process_end) as process_end
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
WHERE upper(owner||'.'||TARGET_TABLE_NAME) = upper(:table_name)
AND PROCESS_END is not null AND PROCESS_SUCCESSFUL='Y'
),
LAST_UPDATE_DEVO AS (
SELECT CASE WHEN last_status = 'FINISHED' THEN LAST_END_TIME ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY') END as process_end
FROM CT_MRDS.a_devo_replica_mgmt_rar
WHERE OWNER || '.' || TABLE_NAME = :table_name
)
SELECT CASE WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) > (SELECT process_end FROM LAST_UPDATE_DEVO)
THEN 'Y' ELSE 'N' END AS TRIGGER_DEVO_REPLICATOR FROM dual
"""
cursor.execute(sql, table_name=table_full_name)
result = cursor.fetchone()
status = result[0] if result else 'N'
logging.info(f"Precondition for {table_full_name}: {status}")
cursor.close()
return {"table": table_full_name, "trigger": status}
except Exception as e:
logging.error(f"Error checking precondition for {table_full_name}: {e}")
return {"table": table_full_name, "trigger": 'ERROR'}
finally:
if oracle_conn:
oracle_conn.close()
def get_tables_to_trigger(precondition_results):
triggered_tables = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
logging.info(f"Tables meeting precondition: {triggered_tables}")
return [{"owner_table": table_name} for table_name in triggered_tables]
def branch_on_tables(ti):
precondition_results = ti.xcom_pull(task_ids='check_all_tables')
tables_to_trigger = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
if tables_to_trigger:
return "trigger_devo_replicators"
else:
return "no_table_updated"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=2),
}
with DAG(
dag_id=DAG_NAME,
default_args=default_args,
schedule_interval=None,
catchup=False,
tags=['DevoScheduler', 'DevoReplicatorTrigger']
) as dag:
@task()
def fetch_tables():
return get_devo_replica_table_options()
@task()
def check_all_tables(table_list):
results = [check_table_precondition(tbl) for tbl in table_list]
count_y = sum(1 for r in results if r["trigger"] == "Y")
count_n = sum(1 for r in results if r["trigger"] == "N")
logging.info(f"Precondition results: {results}")
logging.info(f"Tables with trigger = 'Y': {count_y}")
logging.info(f"Tables with trigger = 'N': {count_n}")
return results
@task()
def output_tables_to_trigger(precondition_results):
return get_tables_to_trigger(precondition_results)
branch_task = BranchPythonOperator(
task_id="branch_trigger_check",
python_callable=branch_on_tables,
provide_context=True,
)
no_table_updated = EmptyOperator(task_id="no_table_updated")
tables = fetch_tables()
precondition_results = check_all_tables(tables)
tables_to_trigger = output_tables_to_trigger(precondition_results)
trigger_dag = TriggerDagRunOperator.partial(
task_id="trigger_devo_replicators",
trigger_dag_id=TARGET_DAG_ID,
execution_date="{{ ds }}"
).expand(conf=tables_to_trigger)
# Dependencies for branching
tables >> precondition_results >> tables_to_trigger >> branch_task
branch_task >> [trigger_dag, no_table_updated]
"""
1. fetch_tables gets the list of tables.
2. check_all_tables checks each tables trigger status and logs counts.
3. output_tables_to_trigger prepares the mapped parameter list for triggering downstream DAGs.
4. branch_on_tables decides the path:
"trigger_devo_replicators" if any table triggers.
"no_table_updated" otherwise.
5. BranchPythonOperator implements the conditional branching.
6. TriggerDagRunOperator dynamically triggers a run of devo_replicator_trigger_rar per qualifying table.
7. EmptyOperator represents the "no tables to trigger" branch.
"""