init
This commit is contained in:
0
airflow/devo_replicator/.gitkeep
Normal file
0
airflow/devo_replicator/.gitkeep
Normal file
346
airflow/devo_replicator/devo_replicator_core.py
Normal file
346
airflow/devo_replicator/devo_replicator_core.py
Normal file
@@ -0,0 +1,346 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import yaml
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.python import PythonOperator
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/python/devo_replicator/data_replicator')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
from impala_refresher import main as impala_main
|
||||
|
||||
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
|
||||
|
||||
ENV_CONFIG_PATH = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_core',
|
||||
default_args=default_args,
|
||||
description='Core Devo replicator workflow for single table',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator'],
|
||||
max_active_runs=10,
|
||||
max_active_tasks=16,
|
||||
) as dag:
|
||||
|
||||
# Init - read config from context
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = conf.get("store")
|
||||
if not store:
|
||||
raise ValueError("store parameter is required")
|
||||
store = store.lower()
|
||||
|
||||
owner_table = conf.get("owner_table")
|
||||
if not owner_table or '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst","acc","prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
if store not in {"mopdb", "rar", "rqsd"}:
|
||||
raise ValueError(f"Unsupported store '{store}'. Expected 'mopdb', 'rar', 'rqsd'.")
|
||||
|
||||
if store == "mopdb":
|
||||
p_service_name = "MOPDB"
|
||||
elif store == "rar":
|
||||
p_service_name = "RAR"
|
||||
elif store == "rqsd":
|
||||
p_service_name = "RQSD"
|
||||
|
||||
with open(ENV_CONFIG_PATH, "r") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
env_cfg = cfg[env]
|
||||
store_cfg = cfg[store]
|
||||
p_objectstore_uri = env_cfg["S3_LOCATION_URI"].replace("{0}", store.lower())
|
||||
|
||||
# Get verified run_id using security utilities
|
||||
p_run_id = get_verified_run_id(context)
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s table=%s.%s run_id=%s",
|
||||
env, store, table_owner, table_name, p_run_id)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"config_path": ENV_CONFIG_PATH,
|
||||
"p_run_id": p_run_id,
|
||||
"p_service_name": p_service_name,
|
||||
"p_table_owner": table_owner,
|
||||
"p_table_name": table_name,
|
||||
"p_objectstore_uri": p_objectstore_uri,
|
||||
"corporate_store": store_cfg["corporate_store"],
|
||||
"owner_table": owner_table,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Start log table
|
||||
def start_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
p_run_id = ti.xcom_pull(task_ids='init_step', key='p_run_id')
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.start_log_table',
|
||||
[p_run_id, p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("start_log_table procedure executed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error in start_log_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='start_log_table',
|
||||
python_callable=start_log_table_task,
|
||||
)
|
||||
|
||||
# Export table
|
||||
def export_table_task(**context):
|
||||
ti = context["ti"]
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.export_table',
|
||||
[p_service_name, p_table_owner, p_table_name, p_objectstore_uri]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("export_table procedure executed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error in export_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='export_table',
|
||||
python_callable=export_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Check if previous tasks succeeded before triggering child DAG
|
||||
def check_previous_tasks_success(**context):
|
||||
ti = context["ti"]
|
||||
dag_run = context['dag_run']
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
|
||||
check_tasks = ['start_log_table', 'export_table']
|
||||
failed = []
|
||||
|
||||
for tid in check_tasks:
|
||||
ti_up = dag_run.get_task_instance(tid)
|
||||
if ti_up and ti_up.state != 'success':
|
||||
failed.append(f"{tid}:{ti_up.state}")
|
||||
|
||||
if failed:
|
||||
error_msg = f"Cannot proceed with {store} table generator. Previous tasks not successful: {', '.join(failed)}"
|
||||
logging.error(error_msg)
|
||||
raise AirflowFailException(error_msg)
|
||||
|
||||
logging.info(f"All previous tasks succeeded. Ready to trigger {store} table generator.")
|
||||
return True
|
||||
|
||||
t3_check = PythonOperator(
|
||||
task_id='check_previous_tasks_success',
|
||||
python_callable=check_previous_tasks_success,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
def drop_table(**context):
|
||||
ti = context["ti"]
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.clear_s3_bucket',
|
||||
[p_service_name, p_table_owner, p_table_name + '_COPY', p_objectstore_uri]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("clear_s3_bucket for table {0} procedure executed successfully".format(p_table_name[:-5].lower()))
|
||||
except Exception as e:
|
||||
logging.error(f"Error in clear_s3_bucket: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t3_drop = PythonOperator(
|
||||
task_id='drop_table',
|
||||
python_callable=drop_table,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Trigger table generator DAG based on store
|
||||
def trigger_table_generator(**context):
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# Determine target DAG based on store
|
||||
if store == "mopdb":
|
||||
target_dag_id = 'devo_table_generator_trigger_mopdb'
|
||||
elif store == "rar":
|
||||
target_dag_id = 'devo_table_generator_trigger_rar'
|
||||
elif store == "rqsd":
|
||||
target_dag_id = 'devo_table_generator_trigger_rqsd'
|
||||
else:
|
||||
raise ValueError(f"Unsupported store: {store}")
|
||||
|
||||
# Add _COPY suffix to table name for the target table
|
||||
owner_table_with_copy = f"{table_owner}.{table_name}_COPY"
|
||||
|
||||
# Create configuration dictionary
|
||||
trigger_conf = {
|
||||
"owner_table": owner_table_with_copy
|
||||
}
|
||||
|
||||
logging.info(f"Triggering {target_dag_id} with conf: {trigger_conf}")
|
||||
|
||||
try:
|
||||
dag_run = trigger_dag(
|
||||
dag_id=target_dag_id,
|
||||
conf=trigger_conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
logging.info(f"Successfully triggered {target_dag_id}, run_id: {dag_run.run_id}")
|
||||
ti.xcom_push(key='triggered_dag_run_id', value=dag_run.run_id)
|
||||
ti.xcom_push(key='triggered_dag_id', value=target_dag_id)
|
||||
|
||||
return dag_run.run_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error triggering {target_dag_id}: {e}")
|
||||
raise
|
||||
|
||||
t3_trigger = PythonOperator(
|
||||
task_id='trigger_table_generator',
|
||||
python_callable=trigger_table_generator,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# End log table
|
||||
def end_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.end_log_table',
|
||||
[p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("end_log_table procedure executed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error in end_log_table: {e}")
|
||||
logging.info("Continuing despite end_log_table error (cleanup task)")
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t4 = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Check status and fail if needed
|
||||
def fail_if_any_failed(**context):
|
||||
dag_run = context['dag_run']
|
||||
check_tasks = ['start_log_table', 'export_table', 'check_previous_tasks_success', 'trigger_table_generator']
|
||||
failed = []
|
||||
|
||||
for tid in check_tasks:
|
||||
ti_up = dag_run.get_task_instance(tid)
|
||||
if ti_up and ti_up.state == 'failed':
|
||||
failed.append(tid)
|
||||
|
||||
if failed:
|
||||
error_msg = f"Critical task(s) failed: {', '.join(failed)}. DAG execution failed."
|
||||
logging.error(error_msg)
|
||||
raise AirflowFailException(error_msg)
|
||||
|
||||
logging.info("All critical tasks completed successfully: %s", check_tasks)
|
||||
|
||||
t5 = PythonOperator(
|
||||
task_id='fail_if_any_failed',
|
||||
python_callable=fail_if_any_failed,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2 >> t3_check >> t3_drop >> t3_trigger
|
||||
[t1, t2, t3_trigger] >> t4
|
||||
t4 >> t5
|
||||
239
airflow/devo_replicator/devo_replicator_core_pandas.py
Normal file
239
airflow/devo_replicator/devo_replicator_core_pandas.py
Normal file
@@ -0,0 +1,239 @@
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.hooks.S3_hook import S3Hook
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import io
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
|
||||
SERVICE_NAME = "SERVICE_NAME"
|
||||
OWNER = "C2D"
|
||||
TABLE_NAME = "T_CEPH"
|
||||
METADATA_OWNER = "CT_MOPDB"
|
||||
METADATA_TABLE = "mopdb_metadata_inventory"
|
||||
|
||||
USE_LOCAL_STORAGE = True
|
||||
LOCAL_OUTPUT_DIR = "/tmp/devo_replicator_output"
|
||||
|
||||
S3_BUCKET = "bucket-name"
|
||||
S3_PREFIX = "devo/replicator/C2D/T_CEPH/"
|
||||
AWS_CONN_ID = "aws_default"
|
||||
|
||||
DEFAULT_ARGS = {
|
||||
"owner": "airflow",
|
||||
"depends_on_past": False,
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 1,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
}
|
||||
DAG_ID = "devo_replicator_pandas"
|
||||
SCHEDULE_INTERVAL = None
|
||||
CHUNK_SIZE = 100000
|
||||
|
||||
|
||||
def query_oracle_template(owner, table_name):
|
||||
try:
|
||||
input_query = """SELECT
|
||||
COLUMN_NAME,
|
||||
DATA_TYPE,
|
||||
CHAR_LENGTH,
|
||||
DATA_PRECISION,
|
||||
DATA_SCALE,
|
||||
COLUMN_ID,
|
||||
CASE
|
||||
WHEN DATA_TYPE = 'DATE' OR DATA_TYPE LIKE '%TIMESTAMP%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(100)) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'VARCHAR2' OR DATA_TYPE LIKE '%CHAR%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(' || CAST(CHAR_LENGTH AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL AND DATA_SCALE IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ',' || CAST(DATA_SCALE AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'CLOB' THEN
|
||||
'TO_CHAR(SUBSTR(' || COLUMN_NAME || ', 1, 32767)) AS ' || COLUMN_NAME
|
||||
ELSE
|
||||
COLUMN_NAME
|
||||
END AS casting
|
||||
FROM {0}.{1}
|
||||
WHERE OWNER = '{2}' AND TABLE_NAME = '{3}' AND A_VALID_TO > SYSDATE
|
||||
ORDER BY COLUMN_ID""".format(METADATA_OWNER, METADATA_TABLE, owner, table_name)
|
||||
|
||||
conn = connect('MRDS_LOADER')
|
||||
df = pd.read_sql(input_query, conn)
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(f"No metadata found for {owner}.{table_name}")
|
||||
|
||||
output_query = 'SELECT ' + ', \n'.join(df['casting'].tolist()) + ' FROM {0}.{1}'.format(owner, table_name)
|
||||
|
||||
column_metadata = {}
|
||||
for _, row in df.iterrows():
|
||||
col_name = row['COLUMN_NAME']
|
||||
data_type = row['DATA_TYPE']
|
||||
|
||||
if data_type in ('NUMBER', 'DECIMAL', 'FLOAT', 'BINARY_FLOAT', 'BINARY_DOUBLE'):
|
||||
if pd.notna(row['DATA_SCALE']) and row['DATA_SCALE'] > 0:
|
||||
column_metadata[col_name] = 'float64'
|
||||
elif pd.notna(row['DATA_PRECISION']) and row['DATA_PRECISION'] <= 9:
|
||||
column_metadata[col_name] = 'Int32'
|
||||
elif pd.notna(row['DATA_PRECISION']) and row['DATA_PRECISION'] <= 18:
|
||||
column_metadata[col_name] = 'Int64'
|
||||
else:
|
||||
column_metadata[col_name] = 'float64'
|
||||
elif data_type == 'DATE' or 'TIMESTAMP' in data_type:
|
||||
column_metadata[col_name] = 'string'
|
||||
else:
|
||||
column_metadata[col_name] = 'string'
|
||||
|
||||
logging.info(f"Generated query template with {len(df)} columns")
|
||||
return output_query, column_metadata
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in query_oracle_template: {e}")
|
||||
raise
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
def query_oracle_and_generate_parquet(partition_num, partitions, sql, column_metadata, use_local):
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Starting processing (Mode: {'LOCAL' if use_local else 'S3'})")
|
||||
|
||||
partition_sql = f"""SELECT /*+ PARALLEL(t, {partitions}) */ *
|
||||
FROM (
|
||||
{sql}
|
||||
) t
|
||||
WHERE ORA_HASH(ROWID, {partitions - 1}) = {partition_num}"""
|
||||
|
||||
conn = connect("MRDS_LOADER")
|
||||
|
||||
if use_local:
|
||||
os.makedirs(LOCAL_OUTPUT_DIR, exist_ok=True)
|
||||
output_path = os.path.join(LOCAL_OUTPUT_DIR, f"partition_{partition_num:04d}.parquet")
|
||||
file_handle = open(output_path, 'wb')
|
||||
write_target = file_handle
|
||||
else:
|
||||
s3_hook = S3Hook(aws_conn_id=AWS_CONN_ID)
|
||||
s3_client = s3_hook.get_conn()
|
||||
s3_key = f"{S3_PREFIX}partition_{partition_num:04d}.parquet"
|
||||
buffer = io.BytesIO()
|
||||
write_target = buffer
|
||||
|
||||
try:
|
||||
chunk_iterator = pd.read_sql(partition_sql, conn, chunksize=CHUNK_SIZE)
|
||||
|
||||
pqwriter = None
|
||||
total_rows = 0
|
||||
chunk_count = 0
|
||||
|
||||
for chunk in chunk_iterator:
|
||||
for col, dtype in column_metadata.items():
|
||||
if col in chunk.columns:
|
||||
try:
|
||||
if dtype == 'string':
|
||||
chunk[col] = chunk[col].astype('string')
|
||||
elif dtype.startswith('Int'):
|
||||
chunk[col] = pd.to_numeric(chunk[col], errors='coerce').astype(dtype)
|
||||
elif dtype == 'float64':
|
||||
chunk[col] = pd.to_numeric(chunk[col], errors='coerce')
|
||||
except Exception as e:
|
||||
logging.warning(f"[Pandas-Partition {partition_num}] Could not optimize column {col}: {e}")
|
||||
|
||||
table = pa.Table.from_pandas(chunk, preserve_index=False)
|
||||
|
||||
if pqwriter is None:
|
||||
pqwriter = pq.ParquetWriter(
|
||||
write_target,
|
||||
table.schema,
|
||||
compression='snappy',
|
||||
use_dictionary=True,
|
||||
write_statistics=True,
|
||||
version='2.6',
|
||||
data_page_size=1024*1024,
|
||||
)
|
||||
|
||||
pqwriter.write_table(table)
|
||||
total_rows += len(chunk)
|
||||
chunk_count += 1
|
||||
|
||||
if chunk_count % 10 == 0:
|
||||
if use_local:
|
||||
file_size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||
else:
|
||||
file_size_mb = write_target.tell() / 1024 / 1024
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Processed {total_rows:,} rows, Size: {file_size_mb:.2f} MB")
|
||||
|
||||
if pqwriter:
|
||||
pqwriter.close()
|
||||
|
||||
if use_local:
|
||||
file_size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Completed - {total_rows:,} rows, {file_size_mb:.2f} MB saved to {output_path}")
|
||||
else:
|
||||
write_target.seek(0)
|
||||
buffer_size_mb = write_target.getbuffer().nbytes / 1024 / 1024
|
||||
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Uploading {buffer_size_mb:.2f} MB to s3://{S3_BUCKET}/{s3_key}")
|
||||
|
||||
s3_client.upload_fileobj(write_target, S3_BUCKET, s3_key)
|
||||
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Completed - {total_rows:,} rows, {buffer_size_mb:.2f} MB uploaded to S3")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"[Pandas-Partition {partition_num}] Error: {e}")
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
if use_local:
|
||||
file_handle.close()
|
||||
else:
|
||||
write_target.close()
|
||||
|
||||
|
||||
def generate_tasks(dag, partitions, sql, column_metadata, use_local):
|
||||
tasks = []
|
||||
for partition_num in range(partitions):
|
||||
task = PythonOperator(
|
||||
task_id=f"generate_parquet_partition_{partition_num}",
|
||||
python_callable=query_oracle_and_generate_parquet,
|
||||
op_kwargs={
|
||||
"partition_num": partition_num,
|
||||
"partitions": partitions,
|
||||
"sql": sql,
|
||||
"column_metadata": column_metadata,
|
||||
"use_local": use_local
|
||||
},
|
||||
provide_context=True,
|
||||
dag=dag,
|
||||
)
|
||||
tasks.append(task)
|
||||
return tasks
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_pandas',
|
||||
default_args=DEFAULT_ARGS,
|
||||
description='Devo replicator using Pandas with dtype optimization',
|
||||
schedule_interval=SCHEDULE_INTERVAL,
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'Pandas'],
|
||||
max_active_runs=1,
|
||||
max_active_tasks=30,
|
||||
) as dag:
|
||||
|
||||
query, column_metadata = query_oracle_template(OWNER, TABLE_NAME)
|
||||
|
||||
PARTITIONS = 16
|
||||
|
||||
partition_tasks = generate_tasks(dag, PARTITIONS, query, column_metadata, USE_LOCAL_STORAGE)
|
||||
244
airflow/devo_replicator/devo_replicator_core_pyarrow.py
Normal file
244
airflow/devo_replicator/devo_replicator_core_pyarrow.py
Normal file
@@ -0,0 +1,244 @@
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.hooks.S3_hook import S3Hook
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import io
|
||||
import logging
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
SERVICE_NAME = "SERVICE_NAME"
|
||||
OWNER = "C2D"
|
||||
TABLE_NAME = "T_CEPH"
|
||||
METADATA_OWNER = "CT_MOPDB"
|
||||
METADATA_TABLE = "mopdb_metadata_inventory"
|
||||
|
||||
## need to be changed
|
||||
S3_BUCKET = "bucket-name"
|
||||
S3_PREFIX = "devo/replicator/C2D/T_CEPH/"
|
||||
AWS_CONN_ID = "aws_default"
|
||||
|
||||
DEFAULT_ARGS = {
|
||||
"owner": "airflow",
|
||||
"depends_on_past": False,
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 1,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
}
|
||||
DAG_ID = "devo_replicator_pyarrow"
|
||||
SCHEDULE_INTERVAL = None
|
||||
BATCH_SIZE = 100000
|
||||
|
||||
|
||||
def query_oracle_template(owner, table_name):
|
||||
try:
|
||||
input_query = """SELECT
|
||||
CASE
|
||||
WHEN DATA_TYPE = 'DATE' OR DATA_TYPE LIKE '%TIMESTAMP%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(100)) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'VARCHAR2' OR DATA_TYPE LIKE '%CHAR%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(' || CAST(CHAR_LENGTH AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL AND DATA_SCALE IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ',' || CAST(DATA_SCALE AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'CLOB' THEN
|
||||
'TO_CHAR(SUBSTR(' || COLUMN_NAME || ', 1, 32767)) AS ' || COLUMN_NAME
|
||||
ELSE
|
||||
COLUMN_NAME
|
||||
END AS casting
|
||||
FROM {0}.{1}
|
||||
WHERE OWNER = '{2}' AND TABLE_NAME = '{3}' AND A_VALID_TO > SYSDATE
|
||||
ORDER BY COLUMN_ID""".format(METADATA_OWNER, METADATA_TABLE, owner, table_name)
|
||||
|
||||
conn = connect('MRDS_LOADER')
|
||||
df = pd.read_sql(input_query, conn)
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(f"No metadata found for {owner}.{table_name}")
|
||||
|
||||
output_query = 'SELECT ' + ', \n'.join(df['casting'].tolist()) + ' FROM {0}.{1}'.format(owner, table_name)
|
||||
|
||||
logging.info(f"Generated query template with {len(df)} columns")
|
||||
return output_query
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in query_oracle_template: {e}")
|
||||
raise
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
def query_oracle_and_generate_parquet(partition_num, partitions, sql):
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Starting processing")
|
||||
|
||||
partition_sql = f"""SELECT /*+ PARALLEL(t, {partitions}) */ *
|
||||
FROM (
|
||||
{sql}
|
||||
) t
|
||||
WHERE ORA_HASH(ROWID, {partitions - 1}) = {partition_num}"""
|
||||
|
||||
conn = connect("MRDS_LOADER")
|
||||
cursor = conn.cursor()
|
||||
cursor.arraysize = BATCH_SIZE
|
||||
cursor.prefetchrows = BATCH_SIZE
|
||||
|
||||
s3_hook = S3Hook(aws_conn_id=AWS_CONN_ID)
|
||||
s3_client = s3_hook.get_conn()
|
||||
s3_key = f"{S3_PREFIX}partition_{partition_num:04d}.parquet"
|
||||
|
||||
buffer = io.BytesIO()
|
||||
|
||||
try:
|
||||
cursor.execute(partition_sql)
|
||||
|
||||
column_names = [desc[0] for desc in cursor.description]
|
||||
|
||||
first_batch_rows = cursor.fetchmany(BATCH_SIZE)
|
||||
if not first_batch_rows:
|
||||
logging.warning(f"[PyArrow-Partition {partition_num}] No data found")
|
||||
return
|
||||
|
||||
arrow_fields = []
|
||||
sample_row = first_batch_rows[0]
|
||||
|
||||
for i, col_name in enumerate(column_names):
|
||||
sample_val = sample_row[i]
|
||||
|
||||
if sample_val is None:
|
||||
for row in first_batch_rows[1:]:
|
||||
if row[i] is not None:
|
||||
sample_val = row[i]
|
||||
break
|
||||
|
||||
if isinstance(sample_val, str):
|
||||
arrow_type = pa.string()
|
||||
elif isinstance(sample_val, int):
|
||||
arrow_type = pa.int64()
|
||||
elif isinstance(sample_val, float):
|
||||
arrow_type = pa.float64()
|
||||
elif isinstance(sample_val, (datetime, pd.Timestamp)):
|
||||
arrow_type = pa.timestamp('ns')
|
||||
elif isinstance(sample_val, bytes):
|
||||
arrow_type = pa.binary()
|
||||
else:
|
||||
arrow_type = pa.string()
|
||||
|
||||
arrow_fields.append(pa.field(col_name, arrow_type))
|
||||
|
||||
schema = pa.schema(arrow_fields)
|
||||
|
||||
writer = pq.ParquetWriter(
|
||||
buffer,
|
||||
schema,
|
||||
compression='snappy',
|
||||
use_dictionary=True,
|
||||
write_statistics=True,
|
||||
data_page_size=2*1024*1024,
|
||||
version='2.6',
|
||||
)
|
||||
|
||||
def process_batch(rows):
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
columns_data = list(zip(*rows))
|
||||
arrays = []
|
||||
|
||||
for i, col_data in enumerate(columns_data):
|
||||
try:
|
||||
arrays.append(pa.array(col_data, type=schema.field(i).type))
|
||||
except Exception as e:
|
||||
logging.warning(f"[PyArrow-Partition {partition_num}] Column {column_names[i]} conversion failed: {e}")
|
||||
converted = [str(val) if val is not None else None for val in col_data]
|
||||
arrays.append(pa.array(converted, type=pa.string()))
|
||||
|
||||
return pa.RecordBatch.from_arrays(arrays, schema=schema)
|
||||
|
||||
batch = process_batch(first_batch_rows)
|
||||
if batch:
|
||||
writer.write_batch(batch)
|
||||
|
||||
total_rows = len(first_batch_rows)
|
||||
batch_count = 1
|
||||
|
||||
while True:
|
||||
rows = cursor.fetchmany(BATCH_SIZE)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
batch = process_batch(rows)
|
||||
if batch:
|
||||
writer.write_batch(batch)
|
||||
|
||||
total_rows += len(rows)
|
||||
batch_count += 1
|
||||
|
||||
if batch_count % 10 == 0:
|
||||
buffer_size_mb = buffer.tell() / 1024 / 1024
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Processed {total_rows:,} rows, Buffer size: {buffer_size_mb:.2f} MB")
|
||||
|
||||
writer.close()
|
||||
|
||||
buffer.seek(0)
|
||||
buffer_size_mb = buffer.getbuffer().nbytes / 1024 / 1024
|
||||
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Uploading {buffer_size_mb:.2f} MB to s3://{S3_BUCKET}/{s3_key}")
|
||||
|
||||
s3_client.upload_fileobj(buffer, S3_BUCKET, s3_key)
|
||||
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Completed - {total_rows:,} rows, {buffer_size_mb:.2f} MB uploaded to S3")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"[PyArrow-Partition {partition_num}] Error: {e}")
|
||||
raise
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
buffer.close()
|
||||
|
||||
|
||||
def generate_tasks(dag, partitions, sql):
|
||||
tasks = []
|
||||
for partition_num in range(partitions):
|
||||
task = PythonOperator(
|
||||
task_id=f"generate_parquet_partition_{partition_num}",
|
||||
python_callable=query_oracle_and_generate_parquet,
|
||||
op_kwargs={
|
||||
"partition_num": partition_num,
|
||||
"partitions": partitions,
|
||||
"sql": sql
|
||||
},
|
||||
provide_context=True,
|
||||
dag=dag,
|
||||
)
|
||||
tasks.append(task)
|
||||
return tasks
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_pyarrow',
|
||||
default_args=DEFAULT_ARGS,
|
||||
description='Devo replicator using PyArrow native processing',
|
||||
schedule_interval=SCHEDULE_INTERVAL,
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'PyArrow'],
|
||||
max_active_runs=1,
|
||||
max_active_tasks=16,
|
||||
) as dag:
|
||||
|
||||
query = query_oracle_template(OWNER, TABLE_NAME)
|
||||
|
||||
PARTITIONS = 16
|
||||
|
||||
partition_tasks = generate_tasks(dag, PARTITIONS, query)
|
||||
181
airflow/devo_replicator/devo_replicator_scheduler_rar.py
Normal file
181
airflow/devo_replicator/devo_replicator_scheduler_rar.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# dags/dev_replicator_scheduler_rar.py
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.decorators import task
|
||||
from airflow.operators.python import BranchPythonOperator
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from mrds.utils import oraconn # your Oracle connection helper
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
DAG_NAME = "devo_replicator_scheduler_rar"
|
||||
TARGET_DAG_ID = "devo_replicator_trigger"
|
||||
ORACLE_CONN_NAME = "MRDS_LOADER"
|
||||
|
||||
PRECONDITION_SQL = """
|
||||
WITH LAST_UPDATE_ORACLE AS (
|
||||
SELECT MAX(process_end) AS process_end
|
||||
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
|
||||
WHERE UPPER(owner || '.' || target_table_name) = UPPER(:table_name)
|
||||
AND process_end IS NOT NULL
|
||||
AND process_successful = 'Y'
|
||||
),
|
||||
LAST_UPDATE_DEVO AS (
|
||||
SELECT CASE
|
||||
WHEN last_status = 'FINISHED' THEN last_end_time
|
||||
ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY')
|
||||
END AS process_end
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
WHERE owner || '.' || table_name = :table_name
|
||||
)
|
||||
SELECT CASE
|
||||
WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) >
|
||||
(SELECT process_end FROM LAST_UPDATE_DEVO)
|
||||
THEN 'Y' ELSE 'N'
|
||||
END AS trigger_devo_replicator
|
||||
FROM dual
|
||||
"""
|
||||
|
||||
def _get_conn():
|
||||
return oraconn.connect(ORACLE_CONN_NAME)
|
||||
|
||||
def get_devo_replica_table_options() -> list[str]:
|
||||
conn = None
|
||||
cur = None
|
||||
try:
|
||||
conn = _get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT OWNER || '.' || TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
tables = [r[0] for r in rows] if rows else []
|
||||
logging.info("Fetched %d table(s) from replica mgmt.", len(tables))
|
||||
return tables
|
||||
except Exception:
|
||||
logging.exception("Error getting DEVO replica table options")
|
||||
return []
|
||||
finally:
|
||||
try:
|
||||
if cur: cur.close()
|
||||
except Exception:
|
||||
pass
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
def check_table_precondition(table_full_name: str) -> dict:
|
||||
"""Returns {"table": <OWNER.TABLE>, "trigger": "Y"|"N"}."""
|
||||
conn = None
|
||||
cur = None
|
||||
try:
|
||||
conn = _get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute(PRECONDITION_SQL, {"table_name": table_full_name})
|
||||
row = cur.fetchone()
|
||||
status = (row[0] if row else 'N') or 'N'
|
||||
logging.info("Precondition for %s: %s", table_full_name, status)
|
||||
return {"table": table_full_name, "trigger": status}
|
||||
except Exception:
|
||||
logging.exception("Error checking precondition for %s", table_full_name)
|
||||
return {"table": table_full_name, "trigger": "N"} # fail closed
|
||||
finally:
|
||||
try:
|
||||
if cur: cur.close()
|
||||
except Exception:
|
||||
pass
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=DAG_NAME,
|
||||
description="Checks DEVO replica preconditions; triggers devo_replicator_trigger_rar once if any table is stale",
|
||||
default_args=default_args,
|
||||
start_date=datetime.now() - timedelta(days=2),
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
schedule_interval='*/10 * * * *', # every 10 minutes
|
||||
max_active_runs=1,
|
||||
tags=["DevoScheduler", "DevoReplicatorTrigger"],
|
||||
) as dag:
|
||||
|
||||
@task
|
||||
def fetch_tables() -> list[str]:
|
||||
tables = get_devo_replica_table_options()
|
||||
if not tables:
|
||||
logging.warning("No tables returned from enumeration.")
|
||||
return tables
|
||||
|
||||
@task
|
||||
def check_one(table_name: str) -> dict:
|
||||
return check_table_precondition(table_name)
|
||||
|
||||
@task
|
||||
def summarize(results: list[dict]) -> dict:
|
||||
y_tables = [r["table"] for r in results if r and r.get("trigger") == "Y"]
|
||||
n_tables = [r["table"] for r in results if r and r.get("trigger") == "N"]
|
||||
|
||||
logging.info("Precondition summary -> Y: %d, N: %d", len(y_tables), len(n_tables))
|
||||
if y_tables:
|
||||
logging.info("Tables needing replication: %s", ", ".join(y_tables))
|
||||
else:
|
||||
logging.info("No tables are updated/stale; nothing to trigger.")
|
||||
|
||||
return {"any_true": bool(y_tables), "y_tables": y_tables}
|
||||
|
||||
def decide_branch(summary: dict) -> str:
|
||||
"""Return the EXACT downstream task_id to follow."""
|
||||
return "prepare_trigger_conf" if summary.get("any_true") else "no_updates"
|
||||
|
||||
@task
|
||||
def prepare_trigger_conf(summary: dict) -> dict:
|
||||
"""Single conf payload for the downstream DAG."""
|
||||
return {"tables_to_replicate": summary.get("y_tables", [])}
|
||||
|
||||
no_updates = EmptyOperator(task_id="no_updates")
|
||||
|
||||
# Graph
|
||||
tables = fetch_tables()
|
||||
results = check_one.expand(table_name=tables) # dynamic mapping across tables
|
||||
summary = summarize(results)
|
||||
|
||||
branch = BranchPythonOperator(
|
||||
task_id="branch_on_any",
|
||||
python_callable=decide_branch,
|
||||
op_args=[summary], # XComArg from summarize
|
||||
)
|
||||
|
||||
|
||||
conf_payload = prepare_trigger_conf(summary)
|
||||
|
||||
trigger_devo = TriggerDagRunOperator(
|
||||
task_id="trigger_devo_replicator_rar",
|
||||
trigger_dag_id=TARGET_DAG_ID,
|
||||
wait_for_completion=True,
|
||||
reset_dag_run=True,
|
||||
conf=conf_payload,
|
||||
)
|
||||
|
||||
# Wire branching — only ONE instance of prepare_trigger_conf is referenced
|
||||
summary >> branch
|
||||
branch >> no_updates
|
||||
branch >> conf_payload >> trigger_devo
|
||||
255
airflow/devo_replicator/devo_replicator_trigger.py
Normal file
255
airflow/devo_replicator/devo_replicator_trigger.py
Normal file
@@ -0,0 +1,255 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
from airflow.decorators import task
|
||||
from airflow.providers.oracle.hooks.oracle import OracleHook
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
ORACLE_CONN_ID = "MRDS_LOADER"
|
||||
# TARGET_DAG_ID = "devo_replicator_trigger_rar"
|
||||
|
||||
def get_rar_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER || '.' || TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RAR table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RAR tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
max_active_runs=1,
|
||||
params={
|
||||
# still allow manual runs from the UI
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type=["string", "null"],
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
#enum=get_rar_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# --- Init: read conf ---
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rar"
|
||||
owner_table = conf.get("owner_table") # optional single table
|
||||
tables_to_replicate = conf.get("tables_to_replicate") # optional list of OWNER.TABLE
|
||||
|
||||
# Log what we got
|
||||
if tables_to_replicate:
|
||||
logging.info("Received tables_to_replicate from upstream: %d table(s).", len(tables_to_replicate))
|
||||
elif owner_table:
|
||||
logging.info("Received single owner_table from conf: %s", owner_table)
|
||||
else:
|
||||
logging.info("No conf provided; manual UI param may be used or fallback to full list in get_table_list.")
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"owner_table": owner_table, # may be None
|
||||
"tables_to_replicate": tables_to_replicate # may be None/list
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# --- Build the processing list ---
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
tables_to_replicate = ti.xcom_pull(task_ids='init_step', key='tables_to_replicate')
|
||||
|
||||
# 1) If upstream provided a list, use it
|
||||
if tables_to_replicate:
|
||||
logging.info("Using tables_to_replicate list from conf: %d items", len(tables_to_replicate))
|
||||
tables = []
|
||||
for ot in tables_to_replicate:
|
||||
if '.' not in ot:
|
||||
logging.warning("Skipping malformed owner_table (no dot): %s", ot)
|
||||
continue
|
||||
table_owner, table_name = ot.split('.', 1)
|
||||
tables.append((table_owner, table_name))
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
|
||||
# 2) Else if a single owner_table provided (manual/programmatic)
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing single table from conf/params: %s", owner_table)
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
|
||||
# 3) Else fallback to full list in DB (manual run without conf)
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Fallback: Found %d tables for RAR", len(tables))
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# --- Keep your existing throttled triggering logic unchanged ---
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = 30 # you were pushing 30; keep it here or push from init
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT
|
||||
(SELECT NVL(SUM(MAX_THREADS),0) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT NVL(SUM(MAX_THREADS),0) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
cursor.execute(sql_query)
|
||||
total_running_val, table_running_val = cursor.fetchone()
|
||||
cursor.close()
|
||||
|
||||
logging.info(
|
||||
"Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val or 0, threshold, table_running_val or 0
|
||||
)
|
||||
|
||||
if (total_running_val or 0) > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if (table_running_val or 0) >= 1:
|
||||
logging.info("Table %s.%s already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger the core DAG for this specific table
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
conf = {"store": store, "owner_table": f"{table_owner}.{table_name}"}
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total core DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
init >> t1 >> t2
|
||||
|
||||
|
||||
"""
|
||||
Reading tables_to_replicate from dag_run.conf in init_step.
|
||||
Pushing it to XCom (so get_table_list can use it).
|
||||
Tell get_table_list to prioritize the provided list.
|
||||
init_step reads tables_to_replicate from dag_run.conf and puts it into XCom.
|
||||
get_table_list prioritizes that list; falls back to owner_table or full table list only if needed.
|
||||
check_and_trigger loops over those tables and triggers your core DAG (devo_replicator_core) per table, respecting your concurrency threshold.
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_replicator_trigger_mopdb.py
Normal file
257
airflow/devo_replicator/devo_replicator_trigger_mopdb.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get MOPDB table options for dropdown
|
||||
def get_mopdb_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting MOPDB table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger_mopdb',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for MOPDB tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_mopdb_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "mopdb"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
"threshold": 30,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for MOPDB", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Check and trigger core DAG
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Execute SQL query with variable substitution
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
|
||||
WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
|
||||
WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
total_running_val = result[0] or 0
|
||||
table_running_val = result[1] or 0
|
||||
cursor.close()
|
||||
|
||||
logging.info("Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val, threshold, table_running_val)
|
||||
|
||||
if total_running_val > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
MOPDB Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "mopdb" (fixed for this DAG)
|
||||
- Sets threshold to 30 (max concurrent running threads)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) check_and_trigger
|
||||
- Loops through each table from the table list
|
||||
- For each table, enters monitoring loop:
|
||||
- Executes SQL query to check total running threads across MOPDB+RAR
|
||||
- Checks if current table is already running
|
||||
- If total threads > threshold (30): waits 5 minutes and rechecks
|
||||
- If table already running: skips to next tabl
|
||||
- If conditions met: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered
|
||||
- Ensures system doesn't exceed concurrent processing limits
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_replicator_trigger_rar.py
Normal file
257
airflow/devo_replicator/devo_replicator_trigger_rar.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get MOPDB table options for dropdown
|
||||
def get_rar_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RAR table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger_rar',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RAR tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_rar_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rar"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
"threshold": 30,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RAR", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Check and trigger core DAG
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Execute SQL query with variable substitution
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
|
||||
WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
|
||||
WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
total_running_val = result[0] or 0
|
||||
table_running_val = result[1] or 0
|
||||
cursor.close()
|
||||
|
||||
logging.info("Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val, threshold, table_running_val)
|
||||
|
||||
if total_running_val > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RAR Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "rar" (fixed for this DAG)
|
||||
- Sets threshold to 30 (max concurrent running threads)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) check_and_trigger
|
||||
- Loops through each table from the table list
|
||||
- For each table, enters monitoring loop:
|
||||
- Executes SQL query to check total running threads across MOPDB+RAR
|
||||
- Checks if current table is already running
|
||||
- If total threads > threshold (30): waits 5 minutes and rechecks
|
||||
- If table already running: skips to next tabl
|
||||
- If conditions met: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered
|
||||
- Ensures system doesn't exceed concurrent processing limits
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_replicator_trigger_rqsd.py
Normal file
257
airflow/devo_replicator/devo_replicator_trigger_rqsd.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get RQSD table options for dropdown
|
||||
def get_rqsd_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RQSD table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger_rqsd',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RQSD tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_rqsd_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rqsd"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
"threshold": 30,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RQSD", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Check and trigger core DAG
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Execute SQL query with variable substitution
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
|
||||
WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
|
||||
WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
total_running_val = result[0] or 0
|
||||
table_running_val = result[1] or 0
|
||||
cursor.close()
|
||||
|
||||
logging.info("Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val, threshold, table_running_val)
|
||||
|
||||
if total_running_val > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RQSD Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "rqsd" (fixed for this DAG)
|
||||
- Sets threshold to 30 (max concurrent running threads)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rqsd
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) check_and_trigger
|
||||
- Loops through each table from the table list
|
||||
- For each table, enters monitoring loop:
|
||||
- Executes SQL query to check total running threads across MOPDB+RAR
|
||||
- Checks if current table is already running
|
||||
- If total threads > threshold (30): waits 5 minutes and rechecks
|
||||
- If table already running: skips to next tabl
|
||||
- If conditions met: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered
|
||||
- Ensures system doesn't exceed concurrent processing limits
|
||||
"""
|
||||
1080
airflow/devo_replicator/devo_table_generator.py
Normal file
1080
airflow/devo_replicator/devo_table_generator.py
Normal file
File diff suppressed because it is too large
Load Diff
255
airflow/devo_replicator/devo_table_generator_trigger_mopdb.py
Normal file
255
airflow/devo_replicator/devo_table_generator_trigger_mopdb.py
Normal file
@@ -0,0 +1,255 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get MOPDB table options for dropdown
|
||||
def get_mopdb_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
options = ["__ALL_EXCLUDE_COPY__"] + [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting MOPDB table options: {e}")
|
||||
return ["__ALL_EXCLUDE_COPY__"]
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_table_generator_trigger_mopdb',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for MOPDB tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default="__ALL_EXCLUDE_COPY__",
|
||||
type="string",
|
||||
description="Select '__ALL_EXCLUDE_COPY__' to run all tables without _COPY, or select specific table in format OWNER.TABLE_NAME",
|
||||
enum=get_mopdb_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "mopdb"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
|
||||
# Handle special "run all" case
|
||||
run_all_exclude_copy = (owner_table == "__ALL_EXCLUDE_COPY__")
|
||||
|
||||
if not run_all_exclude_copy and '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME' or '__ALL_EXCLUDE_COPY__'")
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s run_all_exclude_copy=%s",
|
||||
env, store, owner_table, run_all_exclude_copy)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"owner_table": owner_table,
|
||||
"run_all_exclude_copy": run_all_exclude_copy,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
run_all_exclude_copy = ti.xcom_pull(task_ids='init_step', key='run_all_exclude_copy')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if run_all_exclude_copy:
|
||||
# Get all tables excluding those with _COPY in the name
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_mopdb
|
||||
WHERE TABLE_NAME NOT LIKE '%_COPY%'
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Processing ALL tables excluding _COPY: %d tables found", len(tables))
|
||||
elif owner_table:
|
||||
# Process specific table
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
# Fallback: get all tables
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for MOPDB", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Trigger core DAG for each table
|
||||
def trigger_tables(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Check if table is already running
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}'
|
||||
AND TABLE_NAME = '{table_name}'
|
||||
AND LAST_STATUS = 'RUNNING'
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
table_running_val = result[0] or 0
|
||||
cursor.close()
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_table_generator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
|
||||
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
|
||||
triggered_count, skipped_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
ti.xcom_push(key='skipped_count', value=skipped_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in trigger_tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='trigger_tables',
|
||||
python_callable=trigger_tables,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
MOPDB Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME or __ALL_EXCLUDE_COPY__)
|
||||
- Sets store to "mopdb" (fixed for this DAG)
|
||||
- Determines if running all tables excluding _COPY
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If __ALL_EXCLUDE_COPY__: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb excluding _COPY tables
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) trigger_tables
|
||||
- Loops through each table from the table list
|
||||
- For each table:
|
||||
- Checks if table is already running
|
||||
- If table already running: skips to next table
|
||||
- If not running: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered and skipped
|
||||
- No threshold checking or waiting logic
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_table_generator_trigger_rar.py
Normal file
257
airflow/devo_replicator/devo_table_generator_trigger_rar.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get RAR table options for dropdown
|
||||
def get_rar_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
options = ["__ALL_EXCLUDE_COPY__"] + [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RAR table options: {e}")
|
||||
return ["__ALL_EXCLUDE_COPY__"]
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_table_generator_trigger_rar',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RAR tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default="__ALL_EXCLUDE_COPY__",
|
||||
type="string",
|
||||
description="Select '__ALL_EXCLUDE_COPY__' to run all tables without _COPY, or select specific table in format OWNER.TABLE_NAME",
|
||||
enum=get_rar_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rar"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
|
||||
# Handle special "run all" case
|
||||
run_all_exclude_copy = (owner_table == "__ALL_EXCLUDE_COPY__")
|
||||
|
||||
if not run_all_exclude_copy and '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME' or '__ALL_EXCLUDE_COPY__'")
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s run_all_exclude_copy=%s",
|
||||
env, store, owner_table, run_all_exclude_copy)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"owner_table": owner_table,
|
||||
"run_all_exclude_copy": run_all_exclude_copy,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
run_all_exclude_copy = ti.xcom_pull(task_ids='init_step', key='run_all_exclude_copy')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if run_all_exclude_copy:
|
||||
# Get all tables excluding those with _COPY in the name
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
WHERE TABLE_NAME NOT LIKE '%_COPY%'
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Processing ALL tables excluding _COPY: %d tables found", len(tables))
|
||||
elif owner_table:
|
||||
# Process specific table
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
# Fallback: get all tables
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RAR", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Trigger core DAG for each table
|
||||
def trigger_tables(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Check if table is already running
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}'
|
||||
AND TABLE_NAME = '{table_name}'
|
||||
AND LAST_STATUS = 'RUNNING'
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
table_running_val = result[0] or 0
|
||||
cursor.close()
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_table_generator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
|
||||
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
|
||||
triggered_count, skipped_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
ti.xcom_push(key='skipped_count', value=skipped_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in trigger_tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='trigger_tables',
|
||||
python_callable=trigger_tables,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RAR Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME or __ALL_EXCLUDE_COPY__)
|
||||
- Sets store to "rar" (fixed for this DAG)
|
||||
- Determines if running all tables excluding _COPY
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If __ALL_EXCLUDE_COPY__: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar excluding _COPY tables
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) trigger_tables
|
||||
- Loops through each table from the table list
|
||||
- For each table:
|
||||
- Checks if table is already running
|
||||
- If table already running: skips to next table
|
||||
- If not running: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered and skipped
|
||||
- No threshold checking or waiting logic
|
||||
"""
|
||||
239
airflow/devo_replicator/devo_table_generator_trigger_rqsd.py
Normal file
239
airflow/devo_replicator/devo_table_generator_trigger_rqsd.py
Normal file
@@ -0,0 +1,239 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get RQSD table options for dropdown
|
||||
def get_rqsd_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RQSD table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_table_generator_trigger_rqsd',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RQSD tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_rqsd_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rqsd"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RQSD", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Trigger core DAG for each table
|
||||
def trigger_tables(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Check if table is already running
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}'
|
||||
AND TABLE_NAME = '{table_name}'
|
||||
AND LAST_STATUS = 'RUNNING'
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
table_running_val = result[0] or 0
|
||||
cursor.close()
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_table_generator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
|
||||
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
|
||||
triggered_count, skipped_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
ti.xcom_push(key='skipped_count', value=skipped_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in trigger_tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='trigger_tables',
|
||||
python_callable=trigger_tables,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RQSD Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "rqsd" (fixed for this DAG)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rqsd
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) trigger_tables
|
||||
- Loops through each table from the table list
|
||||
- For each table:
|
||||
- Checks if table is already running
|
||||
- If table already running: skips to next table
|
||||
- If not running: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered and skipped
|
||||
- No threshold checking or waiting logic
|
||||
"""
|
||||
Reference in New Issue
Block a user