This commit is contained in:
Grzegorz Michalski
2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions

0
.ci/.gitkeep Normal file
View File

74
.ci/mr_dev_to_latest.sh Normal file
View File

@@ -0,0 +1,74 @@
#!/bin/bash
REPO_URL="https://oauth2:${GL_TOKEN}@gitlab.sofa.dev/mrds/mrds_elt.git"
DEV_BRANCH="dev"
TARGET_BRANCH="latest"
WORK_DIR="mrds_elt_merge"
MERGE_BRANCH="dev_to_latest_merge"
DIRECTORIES=(
"airflow/devo_replicator"
"airflow/ods/rqsd"
"airflow/mopdb/RQSD"
"dbt"
"python/connectors/devo"
"python/devo_replicator"
"python/mrds_common"
".ci"
".gitlab-ci.yml"
".ci/mr_dev_to_latest.sh"
)
echo "Setting up working directory..."
git branch -D "$MERGE_BRANCH" 2>/dev/null || true
rm -rf "$WORK_DIR"
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
echo "Cloning repository..."
git clone "$REPO_URL" .
git config --global user.email "cicd@sofa.dev"
git config --global user.name "CICD Pipeline"
echo "Creating merge branch from $TARGET_BRANCH..."
git checkout "$TARGET_BRANCH"
git checkout -b "$MERGE_BRANCH"
echo "Fetching $DEV_BRANCH branch..."
git fetch origin "$DEV_BRANCH"
echo "Selecting changes from $DEV_BRANCH for specific directories..."
CHANGES_FOUND=false
for dir in "${DIRECTORIES[@]}"; do
echo "Copying $dir from $DEV_BRANCH to $MERGE_BRANCH"
git checkout "origin/$DEV_BRANCH" -- "$dir"
done
if ! git diff --quiet "$TARGET_BRANCH"; then
git commit -am "Update selected directories from $DEV_BRANCH"
CHANGES_FOUND=true
fi
if ! git diff --quiet "$TARGET_BRANCH"; then
CHANGES_FOUND=true
fi
if [ "$CHANGES_FOUND" = true ]; then
echo "Pushing merge branch..."
git push -f -u "$REPO_URL" "$MERGE_BRANCH"
echo "Creating merge request..."
if [ -z "$GL_TOKEN" ]; then
echo "GL_TOKEN environment variable not set. Cannot create merge request."
exit 1
else
curl -X POST \
-H "PRIVATE-TOKEN: $GL_TOKEN" \
"https://gitlab.sofa.dev/api/v4/projects/${CI_PROJECT_ID}/merge_requests" \
-d "source_branch=${MERGE_BRANCH}&target_branch=${TARGET_BRANCH}&title=Requested merge of selected directories from ${DEV_BRANCH} to ${TARGET_BRANCH}&description=This MR contains selected directories from ${DEV_BRANCH} branch."
fi
else
echo "No changes found between $DEV_BRANCH and $TARGET_BRANCH for the specified directories."
echo "Skipping merge request creation."
exit 0
fi

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
__pycache__/
*.log
.venv
.tox
*.egg-info/

96
.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,96 @@
stages:
- analyze
- build
- merge
variables:
TARGET_PROJECT: "mrds%2Foci-terraform%2Fmars%2Fmars-airflow"
GITLAB_API_URL: "https://gitlab.sofa.dev/api/v4/projects"
sonarqube_check_latest_sources:
stage: analyze
image: artifactory.sofa.dev/docker-remote/sonarsource/sonar-scanner-cli:latest
script:
- sonar-scanner
-Dsonar.projectKey=$SONAR_PROJECT_KEY
-Dsonar.projectBaseDir=.
-Dsonar.sources=.
-Dsonar.host.url=$SONAR_HOST_URL
-Dsonar.login=$SONAR_TOKEN
rules:
- if: '$CI_COMMIT_BRANCH == "latest"'
build_airflow_from_dev_branch:
stage: build
when: manual
script:
- echo "Triggering pipeline in $TARGET_PROJECT on branch $TARGET_BRANCH..."
- apt-get update && apt-get install curl -y
- echo "#############################################################"
- echo ""
- echo -e "\e[31mContinue to https://gitlab.sofa.dev/mrds/oci-terraform/mars/mars-airflow/-/pipelines to see the build process progress, its result and to trigger deployment to DEV\e[0m"
- echo ""
- echo "#############################################################"
- |
set -e
RESPONSE=$(curl --silent --show-error --fail --request POST \
--form token="$TRIGGER_TOKEN" \
--form ref="dev" \
--form "variables[COMMIT_MESSAGE]=$CI_COMMIT_MESSAGE" \
--form "variables[COMMIT_SHA]=$CI_COMMIT_SHA" \
--form "variables[MR_AUTHOR]=$GITLAB_USER_NAME" \
"$GITLAB_API_URL/$TARGET_PROJECT/trigger/pipeline") || {
echo "ERROR: Failed to trigger remote pipeline!"
echo "$RESPONSE"
exit 1
}
echo "$RESPONSE"
only:
- dev
build_airflow_from_latest_branch:
stage: build
script:
- echo "Triggering pipeline in $TARGET_PROJECT on branch $TARGET_BRANCH..."
- apt-get update && apt-get install curl -y
- echo "#############################################################"
- echo ""
- echo -e "\e[31mContinue to https://gitlab.sofa.dev/mrds/oci-terraform/mars/mars-airflow/-/pipelines to see the build process and its result\e[0m"
- echo ""
- echo "#############################################################"
- |
set -e
RESPONSE=$(curl --silent --show-error --fail --request POST \
--form token="$TRIGGER_TOKEN" \
--form ref="main" \
--form "variables[COMMIT_MESSAGE]=$CI_COMMIT_MESSAGE" \
--form "variables[COMMIT_SHA]=$CI_COMMIT_SHA" \
--form "variables[MR_AUTHOR]=$GITLAB_USER_NAME" \
"$GITLAB_API_URL/$TARGET_PROJECT/trigger/pipeline") || {
echo "ERROR: Failed to trigger remote pipeline!"
echo "$RESPONSE"
exit 1
}
echo "$RESPONSE"
only:
- latest
create_mr_dev_to_latest:
stage: merge
script:
- apt-get update && apt-get install curl git sed -y
- |
if [ "$CI_COMMIT_BRANCH" != "dev" ]; then
echo -e "\e[31mERROR: Merge requests to latest branch are only allowed from dev branch.\e[0m"
exit 1
fi
- bash .ci/mr_dev_to_latest.sh | tee mr_dev_to_latest.log
when: manual
allow_failure: false
only:
- dev
artifacts:
paths:
- mr_dev_to_latest.log
expire_in: 7 days

5
README.md Normal file
View File

@@ -0,0 +1,5 @@
# MRDS_ELT
## Building and deployment rules and procedures
https://europeancentralbank.atlassian.net/wiki/spaces/MRDS/pages/588221438/Gitlab+and+CI+CD+setup
#
#

View File

View File

@@ -0,0 +1,120 @@
import json
import sys
import time
import re
import requests
from datetime import datetime, timedelta
from airflow import DAG
from airflow.models import Variable
from airflow.operators.python_operator import PythonOperator
# from infromatic team, : connect to infromatica (akash)
# Utility to make task_id Airflow-safe
def sanitize_task_id(task_id: str) -> str:
sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id)
if not re.match(r'^[a-zA-Z0-9]', sanitized):
sanitized = 'task_' + sanitized
return sanitized
# Fetch parameters from Airflow Variables
iics_username = Variable.get("iics_username")
iics_password = Variable.get("iics_password")
task_type = Variable.get("task_type", default_var="MTT")
base_url = Variable.get("iics_base_url", default_var="")
# Task name
CDI_task_name = "CDI_task"
# Default DAG args
default_args = {
'owner': 'infa',
'depends_on_past': False,
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
'start_date': datetime.now() - timedelta(seconds=10),
}
# API logic (same as before)
def get_session_id(un, pw):
data = {'@type': 'login', 'username': un, 'password': pw}
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
r = requests.post(base_url, data=json.dumps(data), headers=headers)
if r.status_code == 200:
return r.json()["icSessionId"], r.json()["serverUrl"]
else:
print('API call failed:', r.status_code)
print(r.text)
sys.exit(1)
def start_job(session_id, server_url, taskname, taskType):
job_start_url = server_url + "/api/v2/job"
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType}
r = requests.post(job_start_url, data=json.dumps(data), headers=headers)
if r.status_code == 200:
response_content = r.json()
print(f"Job {taskname} started successfully")
return response_content['taskId'], response_content['runId']
else:
print('Job failed to start:', r.status_code)
print(r.text)
sys.exit(1)
def get_status(server_url, session_id, task_id, run_id):
job_activity_url = server_url + "/api/v2/activity/activityMonitor"
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
r = requests.get(job_activity_url, headers=headers)
if r.status_code == 200:
for obj in r.json():
if obj['taskId'] == task_id and obj['runId'] == run_id:
return obj['executionState']
else:
print('Failed to get status:', r.status_code)
print(r.text)
sys.exit(1)
def execute_task(task_name):
session_id, server_url = get_session_id(iics_username, iics_password)
task_id, run_id = start_job(session_id, server_url, task_name, task_type)
log_url = f"{server_url}/api/v2/activity/activityLog/"
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
while True:
time.sleep(15)
status = get_status(server_url, session_id, task_id, run_id)
print(f"Task status: {status}")
if status not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}:
# Fetch logs on completion
url = f"{log_url}?taskId={task_id}&runId={run_id}"
r = requests.get(url, headers=headers)
logs = r.json()
for obj in logs:
log_id = obj['id']
log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers)
print(log_detail.text)
break
# DAG with no schedule (manual trigger)
dag = DAG(
'IDMC_Airflow_Test',
default_args=default_args,
description='Simplified DAG with one CDI task',
schedule_interval=None,
catchup=False
)
safe_task_id = sanitize_task_id(CDI_task_name)
run_cdi_task = PythonOperator(
task_id=safe_task_id,
python_callable=execute_task,
op_kwargs={'task_name': CDI_task_name},
dag=dag
)

View File

@@ -0,0 +1,142 @@
import json
import sys
import time
import re
import requests
from datetime import datetime, timedelta
from airflow import DAG
from airflow.models import Variable
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
# from infromatic team, : connect to infromatica (akash)
# Utility to make task_id Airflow-safe
def sanitize_task_id(task_id: str) -> str:
# Replace invalid characters with underscores
sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id)
# Ensure task_id starts with a letter or number
if not re.match(r'^[a-zA-Z0-9]', sanitized):
sanitized = 'task_' + sanitized
return sanitized
# Fetch parameters from Airflow Variables
iics_username = Variable.get("iics_username")
iics_password = Variable.get("iics_password")
task_type = Variable.get("task_type", default_var="MTT")
base_url = Variable.get("iics_base_url", default_var="https://dm-us.informaticacloud.com/ma/api/v2/user/login")
# Load task names from Airflow Variables
CDI_task_name = json.loads(Variable.get("CDI_task_name", default_var='["Task_Date_Dim", "Task_Items", "Task_Store_Sales"]'))
CDI_E_task_name = json.loads(Variable.get("CDI_E_task_name", default_var='["Task_Total_Store_Sales_IWDEMO"]'))
# Default DAG args
default_args = {
'owner': 'infa',
'depends_on_past': False,
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
'start_date': datetime.now() - timedelta(seconds=10),
'schedule': '@daily'
}
# API logic
def get_session_id(un, pw):
session_id = ''
data = {'@type': 'login', 'username': un, 'password': pw}
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
r = requests.post(base_url, data=json.dumps(data), headers=headers)
if r.status_code == 200:
session_id = r.json()["icSessionId"]
server_url = r.json()["serverUrl"]
else:
print('API call failed:', r.status_code)
print(r.text)
sys.exit(1)
return session_id, server_url
def start_job(session_id, server_url, taskname, taskType):
job_start_url = server_url + "/api/v2/job"
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType}
r = requests.post(job_start_url, data=json.dumps(data), headers=headers)
if r.status_code == 200:
response_content = r.json()
print("Job", taskname, "started successfully")
return response_content['taskId'], response_content['runId'], response_content['taskName']
else:
print('Job failed to start:', r.status_code)
print(r.text)
def get_status(server_url, session_id):
job_activity_url = server_url + "/api/v2/activity/activityMonitor"
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
r = requests.get(job_activity_url, headers=headers)
if r.status_code == 200:
for obj in r.json():
return obj['taskId'], obj['executionState'], obj['taskName'], obj['runId']
else:
print('Failed to get status:', r.status_code)
print(r.text)
def execute_task(task_name):
session_id, server_url = get_session_id(iics_username, iics_password)
task_id, run_id, _ = start_job(session_id, server_url, task_name, task_type)
log_url = f"{server_url}/api/v2/activity/activityLog/"
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
while True:
time.sleep(15)
task_status = get_status(server_url, session_id)
if not task_status or task_status[1] not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}:
# Fetch log
url = f"{log_url}?taskId={task_id}&runId={run_id}"
r = requests.get(url, headers=headers)
logs = r.json()
for obj in logs:
log_id = obj['id']
log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers)
print(log_detail.text)
break
# Define DAG
dag = DAG(
'IICS_Airflow_Demo',
default_args=default_args,
description='A Sample IICS Airflow DAG',
schedule_interval='@daily',
catchup=False
)
# Task group 1: CDI Tasks
cdi_start = DummyOperator(task_id='cdi_start', dag=dag)
cdi_end = DummyOperator(task_id='cdi_end', dag=dag)
for i in CDI_task_name:
safe_task_id = 'IICS_CDI_' + sanitize_task_id(i)
print(f"Creating task: {safe_task_id} for original task name: {i}")
cdi_task = PythonOperator(
task_id=safe_task_id,
python_callable=execute_task,
op_kwargs={'task_name': i},
dag=dag
)
cdi_start >> cdi_task >> cdi_end
# Task group 2: CDI_E Tasks
for j in CDI_E_task_name:
safe_task_id = 'IICS_CDI_E_' + sanitize_task_id(j)
print(f"Creating E task: {safe_task_id} for original task name: {j}")
cdi_e_task = PythonOperator(
task_id=safe_task_id,
python_callable=execute_task,
op_kwargs={'task_name': j},
dag=dag
)
cdi_end >> cdi_e_task

View File

@@ -0,0 +1,75 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta
import logging
# Importing custom modules
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
sys.path.append('/opt/airflow/python/connectors/devo')
# Import the main function from your script
from devo_connector import main as devo_main
### DEVO CONNECTOR WITH STATIC workflow (task 3)
# Default DAG arguments
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 3,
'retry_delay': timedelta(minutes=5),
}
with DAG(
dag_id='devo_connector_test',
default_args=default_args,
description='Run devo RQSD data ingestion workflow',
schedule_interval=None, # we can set later
#start_date=datetime(2025, 10, 7),
catchup=False,
tags=['Devo', 'RQSD', 'Connector'],
) as dag:
def run_devo_connector_rqsd(**context):
try:
# Pick env from ENV variables
env = os.getenv("MRDS_ENV")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
raise ValueError(
"Missing one or more required environment variables: "
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
)
logging.info(
f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'"
)
workflow_context = {"run_id": 34, "a_workflow_history_key": 6}
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
#env = "tst"
# flow_config_rqsd_observations.yaml
logging.info("Starting Devo RQSD workflow from Airflow DAG")
devo_main(workflow_context, flow_config_path, env_config_path, env)
logging.info("Devo RQSD workflow completed successfully")
except Exception as e:
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
raise
run_devo = PythonOperator(
task_id='run_devo_connector_rqsd',
python_callable=run_devo_connector_rqsd,
)

View File

@@ -0,0 +1,158 @@
import os
import sys
import logging
from airflow.decorators import dag
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow import DAG
from airflow.decorators import task
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from datetime import datetime, timedelta
from airflow.operators.python import BranchPythonOperator
from airflow.operators.empty import EmptyOperator
from mrds.utils import oraconn
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
DAG_NAME = "dev_replicator_scheduler_rar"
TARGET_DAG_ID = "devo_replicator_trigger_rar"
def get_devo_replica_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting MOPDB table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
def check_table_precondition(table_full_name):
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
sql = """
WITH LAST_UPDATE_ORACLE AS (
SELECT max(process_end) as process_end
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
WHERE upper(owner||'.'||TARGET_TABLE_NAME) = upper(:table_name)
AND PROCESS_END is not null AND PROCESS_SUCCESSFUL='Y'
),
LAST_UPDATE_DEVO AS (
SELECT CASE WHEN last_status = 'FINISHED' THEN LAST_END_TIME ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY') END as process_end
FROM CT_MRDS.a_devo_replica_mgmt_rar
WHERE OWNER || '.' || TABLE_NAME = :table_name
)
SELECT CASE WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) > (SELECT process_end FROM LAST_UPDATE_DEVO)
THEN 'Y' ELSE 'N' END AS TRIGGER_DEVO_REPLICATOR FROM dual
"""
cursor.execute(sql, table_name=table_full_name)
result = cursor.fetchone()
status = result[0] if result else 'N'
logging.info(f"Precondition for {table_full_name}: {status}")
cursor.close()
return {"table": table_full_name, "trigger": status}
except Exception as e:
logging.error(f"Error checking precondition for {table_full_name}: {e}")
return {"table": table_full_name, "trigger": 'ERROR'}
finally:
if oracle_conn:
oracle_conn.close()
def get_tables_to_trigger(precondition_results):
triggered_tables = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
logging.info(f"Tables meeting precondition: {triggered_tables}")
return [{"owner_table": table_name} for table_name in triggered_tables]
def branch_on_tables(ti):
precondition_results = ti.xcom_pull(task_ids='check_all_tables')
tables_to_trigger = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
if tables_to_trigger:
return "trigger_devo_replicators"
else:
return "no_table_updated"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=2),
}
with DAG(
dag_id=DAG_NAME,
default_args=default_args,
schedule_interval=None,
catchup=False,
tags=['DevoScheduler', 'DevoReplicatorTrigger']
) as dag:
@task()
def fetch_tables():
return get_devo_replica_table_options()
@task()
def check_all_tables(table_list):
results = [check_table_precondition(tbl) for tbl in table_list]
count_y = sum(1 for r in results if r["trigger"] == "Y")
count_n = sum(1 for r in results if r["trigger"] == "N")
logging.info(f"Precondition results: {results}")
logging.info(f"Tables with trigger = 'Y': {count_y}")
logging.info(f"Tables with trigger = 'N': {count_n}")
return results
@task()
def output_tables_to_trigger(precondition_results):
return get_tables_to_trigger(precondition_results)
branch_task = BranchPythonOperator(
task_id="branch_trigger_check",
python_callable=branch_on_tables,
provide_context=True,
)
no_table_updated = EmptyOperator(task_id="no_table_updated")
tables = fetch_tables()
precondition_results = check_all_tables(tables)
tables_to_trigger = output_tables_to_trigger(precondition_results)
trigger_dag = TriggerDagRunOperator.partial(
task_id="trigger_devo_replicators",
trigger_dag_id=TARGET_DAG_ID,
execution_date="{{ ds }}"
).expand(conf=tables_to_trigger)
# Dependencies for branching
tables >> precondition_results >> tables_to_trigger >> branch_task
branch_task >> [trigger_dag, no_table_updated]
"""
1. fetch_tables gets the list of tables.
2. check_all_tables checks each tables trigger status and logs counts.
3. output_tables_to_trigger prepares the mapped parameter list for triggering downstream DAGs.
4. branch_on_tables decides the path:
"trigger_devo_replicators" if any table triggers.
"no_table_updated" otherwise.
5. BranchPythonOperator implements the conditional branching.
6. TriggerDagRunOperator dynamically triggers a run of devo_replicator_trigger_rar per qualifying table.
7. EmptyOperator represents the "no tables to trigger" branch.
"""

View File

@@ -0,0 +1,112 @@
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.oracle.hooks.oracle import OracleHook
from airflow.utils.dates import days_ago
from datetime import timedelta
import logging
p_run_id = 1234
p_service_name = 'MyService'
p_table_owner = 'MY_SCHEMA'
p_table_name = 'MY_TABLE'
p_objectstore_uri = 's3://bucket/uri' # subject to change appropriate for RAR/MOPDB
def start_log_table_task(**context):
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.start_log_table(:1, :2, :3, :4); END;"
try:
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
conn = oracle_hook.get_conn()
cursor = conn.cursor()
cursor.execute(proc_call, [p_run_id, p_service_name, p_table_owner, p_table_name])
conn.commit()
cursor.close()
conn.close()
logging.info("start_log_table executed successfully.")
except Exception as e:
logging.error("Failed to execute start_log_table: %s", e, exc_info=True)
raise
def export_table_task(**context):
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.export_table(:1, :2, :3, :4); END;"
try:
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
conn = oracle_hook.get_conn()
cursor = conn.cursor()
cursor.execute(proc_call, [p_service_name, p_table_owner, p_table_name, p_objectstore_uri])
conn.commit()
cursor.close()
conn.close()
logging.info("export_table executed successfully.")
except Exception as e:
logging.error("Failed to execute export_table: %s", e, exc_info=True)
raise
def devo_impyla_task(**context):
# Placeholder for Impyla (Devo) code
# Example for future:
# from impala.dbapi import connect
# conn = connect(host="...", port=21050)
# cursor = conn.cursor()
# cursor.execute("...")
logging.info("Impyla (Devo) task placeholder ran. Please implement.")
def end_log_table_task(**context):
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.end_log_table(:1, :2, :3); END;"
try:
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
conn = oracle_hook.get_conn()
cursor = conn.cursor()
cursor.execute(proc_call, [p_service_name, p_table_owner, p_table_name])
conn.commit()
cursor.close()
conn.close()
logging.info("end_log_table executed successfully.")
except Exception as e:
logging.error("Failed to execute end_log_table: %s", e, exc_info=True)
raise
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 2,
'retry_delay': timedelta(minutes=5),
}
with DAG(
dag_id='rqsd_devo_replicator_test_old',
default_args=default_args,
description='Run Devo replicator workflow',
schedule_interval=None,
catchup=False,
tags=['Devo', 'RQSD', 'Replicator'],
) as dag:
t1 = PythonOperator(
task_id='start_log_table',
python_callable=start_log_table_task,
)
t2 = PythonOperator(
task_id='export_table',
python_callable=export_table_task,
)
t3 = PythonOperator(
task_id='devo_impyla',
python_callable=devo_impyla_task,
)
t4 = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
)
t1 >> t2 >> t3 >> t4

View File

@@ -0,0 +1,132 @@
from airflow import DAG
#from airflow.providers.oracle.operators.oracle import SQLExecuteQueryOperator
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
from airflow.utils.dates import days_ago
from datetime import timedelta
import logging
# Importing custom modules
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
# Import your functions
from mrds.utils.manage_runs import init_workflow, finalise_workflow
from devo_replicator.data_replicator.impala_refresher import main as impala_main
#step 5) Devo replication
## DEVO REPLICATOR WITH SQLOperator
### check the oracle connection, fixed params --> test cnx
### pick it from a file,
# TASK :
# - retrive directly from config file the param {0} and {1} based dev/test
# need to be passed from infromatic (WLA call) to dags
# wla to airflow, cnx done
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 2,
'retry_delay': timedelta(minutes=5),
}
with DAG(
dag_id='rqsd_devo_replicator_2',
default_args=default_args,
description='Run Devo replicator workflow',
schedule_interval=None,
catchup=False,
tags=['Devo', 'RQSD', 'Replicator'],
) as dag:
def init_step(**context):
env = os.getenv("MRDS_ENV")
corporate_store= "corporate store is 'crp_mopdb' for mopdb and 'crp_rar' for rar"
config_path = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
p_service_name = 'MOPDB'
p_table_owner = 'MPEC'
p_table_name = 'T_MPEC'
#parse the config yml and filter by dev or test and mopdb or rar
p_objectstore_uri = 'https://devo-crp-ffppyd8q.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/mopdb/db' # subject to change as appropriate
p_run_id = str(context['ti'].run_id)
print(f"=== DEBUG INFO : {p_run_id} ===")
context['ti'].xcom_push(key='p_run_id', value=p_run_id)
init_step = PythonOperator(
task_id='init_step',
python_callable=init_step,
provide_context=True,
)
t1 = SQLExecuteQueryOperator(
task_id='start_log_table',
oracle_conn_id='oracle_default',
# failed ,open up the cnx
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.start_log_table(:p_run_id, :p_service_name, :p_table_owner, :p_table_name); END;",
parameters={
'p_run_id': p_run_id,
'p_service_name': p_service_name,
'p_table_owner': p_table_owner,
'p_table_name': p_table_name
},
#oracle_conn_id='oracle_default'
)
t2 = SQLExecuteQueryOperator(
task_id='export_table',
oracle_conn_id='oracle_default',
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.export_table(:p_service_name, :p_table_owner, :p_table_name, :p_objectstore_uri); END;",
parameters={
'p_service_name': p_service_name,
'p_table_owner': p_table_owner,
'p_table_name': p_table_name,
'p_objectstore_uri': p_objectstore_uri
},
#oracle_conn_id='oracle_default'
)
# Leaving the Devo/Impyla task as a PythonOperator (placeholder)
from airflow.operators.python import PythonOperator
def devo_impyla_task(**context):
status = impala_main(env_config_path, env, table, corporate_store)
logging.info("Impyla (Devo) task placeholder ran. Please implement.")
# get details-data from impala ( its pending )
t3 = PythonOperator(
task_id='devo_impyla',
python_callable=devo_impyla_task,
)
# push to s3, we need to call the proc
t4 = SQLExecuteQueryOperator(
task_id='end_log_table',
oracle_conn_id='oracle_default',
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.end_log_table(:p_service_name, :p_table_owner, :p_table_name); END;",
parameters={
'p_service_name': p_service_name,
'p_table_owner': p_table_owner,
'p_table_name': p_table_name
},
#oracle_conn_id='oracle_default'
)
# t4 need to be executed always if we succeed or not ( if t1 failed then go directly to t4)
# t5 that will check if any of previous dag failed put everything will be read
init_step >> t1 >> t2 >> t3 >> t4

View File

@@ -0,0 +1,65 @@
from airflow import DAG
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
from airflow.operators.python import PythonOperator
from datetime import datetime, timedelta
import logging
logger = logging.getLogger(__name__)
## OLD ( Package repliction)
def test_oracle_connection(**context):
"""Test Oracle connection and log the result"""
conn_id = "marsdb_loader"
from airflow.providers.oracle.hooks.oracle import OracleHook
try:
logger.debug("Attempting to connect to Oracle database...")
hook = OracleHook(oracle_conn_id=conn_id)
conn = hook.get_conn()
cursor = conn.cursor()
cursor.execute("SELECT 1 FROM dual")
result = cursor.fetchone()
logger.info(f"Connection test successful. Result: {result}")
cursor.close()
conn.close()
except Exception as e:
logger.error(f"Connection test failed: {str(e)}")
raise
default_args = {
'depends_on_past': False,
'start_date': datetime(2025, 6, 25),
'retries': 1,
'retry_delay': timedelta(seconds=15),
}
with DAG(
'oracle_plsql_test_dag',
default_args=default_args,
schedule_interval=None,
catchup=False,
) as dag:
test_connection = PythonOperator(
task_id='test_oracle_connection',
python_callable=test_oracle_connection,
)
# With named parameter
run_plsql = SQLExecuteQueryOperator(
task_id='run_plsql_procedure',
conn_id="marsdb_loader",
sql="""
BEGIN
DATA_REPLICATOR.export_table(
p_table_owner => 'c2d',
p_table_name => 't_all_assets_servicer',
p_objectstore_uri => 'https://oci-test-sani.bucket.vpce-0b3a5f000733397b0-kxlyoh5z.s3.eu-central-1.vpce.amazonaws.com/',
p_date_column => 'SNAPSHOT_DATE'
);
END;
""",
)
test_connection >> run_plsql

View File

@@ -0,0 +1,171 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta
import logging
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
# Importing custom modules
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
# Import your functions
from mrds.utils.manage_runs import init_workflow, finalise_workflow
from devo_connector import main as devo_main
from devo_connector_v2 import run as devo_main2
from mrds.core import main as mrds_main
# Default arguments
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 3,
'retry_delay': timedelta(minutes=5),
}
# Dynamic name extraction from basename
dag_id = os.path.splitext(os.path.basename(__file__))[0]
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Run devo RQSD data ingestion workflow with MRDS processing',
schedule_interval=None,
catchup=False,
tags=["Devo", "RQSD", "MRDS", "Connector"],
params={
"source_filename": "",
"config_file": "",
},
) as dag:
def run_devo_connector_rqsd(**context):
"""Run Devo RQSD connector workflow"""
try:
env = os.getenv("MRDS_ENV")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
raise ValueError(
"Missing one or more required environment variables: "
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
)
logging.info(f"Starting Devo RQSD workflow from Airflow DAG for env '{env}'")
database_name = 'MOPDB'
workflow_name = 'w_MOPDB_RQSD_PROCESS'
workflow_run_id = str(context['ti'].run_id)
#comment
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
logging.info("Starting Devo RQSD workflow from Airflow DAG")
count = devo_main2(workflow_context, flow_config_path, env_config_path, env)
print("=================================================================")
print(f"Devo RQSD workflow completed successfully with count : {count}")
logging.info(f"Devo RQSD workflow completed successfully with count : {count}")
# Push the workflow context and history key to XCom for downstream tasks
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
context['ti'].xcom_push(key='workflow_context', value=workflow_context)
except Exception as e:
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
# If init_workflow succeeded but workflow failed, finalize with FAILED status
if 'a_workflow_history_key' in locals():
try:
finalise_workflow(a_workflow_history_key, "FAILED")
except Exception as finalise_error:
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
raise
def run_mrds_task(**context):
"""Run MRDS processing task"""
try:
ti = context.get('ti')
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='run_devo_connector_rqsd')
if not workflow_context:
raise ValueError("No workflow_context from Task 1")
print("=== workflow_context ====:",workflow_context)
source_filename = "RQSD_OBSERVATIONS.csv"
config_file = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_devo_process.yaml"
print("---- run_mrds_task ----")
print("source_filename :", source_filename)
print("config_file = ", config_file)
print("------------------------")
if not source_filename:
raise ValueError("No source_filename provided in DAG run params.")
if not config_file:
raise ValueError("No config_file path provided in DAG run params.")
logging.info(f"Starting MRDS task with source_filename: {source_filename}, config_file: {config_file}")
# Run MRDS with the workflow context from the previous task
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=True)
logging.info("MRDS task completed successfully")
except Exception as e:
logging.error(f"Error running MRDS task: {e}", exc_info=True)
raise
def finalise_workflow_task(**context):
"""Finalize workflow with SUCCESS status"""
# Pull the workflow_history_key from XCom pushed by the main task
ti = context['ti']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
# Call finalise with SUCCESS status
finalise_workflow(a_workflow_history_key, "SUCCESS")
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
# Task definitions
run_devo = PythonOperator(
task_id='run_devo_connector_rqsd',
python_callable=run_devo_connector_rqsd,
provide_context=True,
)
run_mrds = PythonOperator(
task_id='run_mrds_task',
python_callable=run_mrds_task,
provide_context=True,
)
finalize = PythonOperator(
task_id='finalise_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
)
# Task dependencies
run_devo >> run_mrds >> finalize

View File

@@ -0,0 +1,320 @@
from __future__ import annotations
import os
import sys
import logging
import yaml
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.python import PythonOperator
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
try:
from airflow.exceptions import AirflowFailException
except Exception: # fallback for older Airflow
from airflow.exceptions import AirflowException as AirflowFailException
# --- Custom module paths (as in snippet) ---
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
sys.path.append('/opt/airflow/python/devo_replicator/data_replicator')
# --- custom imports ---
from mrds.utils import oraconn
from impala_refresher import main as impala_main
# --- Config path ---
ENV_CONFIG_PATH = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 2,
'retry_delay': timedelta(minutes=5),
}
with DAG(
dag_id='rqsd_devo_replicator_2',
default_args=default_args,
description='Run Devo replicator workflow',
schedule=None,
catchup=False,
tags=['Devo', 'RQSD', 'Replicator'],
) as dag:
# -------------------------------
# 1) Init: read config + set XCom
# -------------------------------
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = conf.get("env") or os.getenv("MRDS_ENV", "dev").lower()
if env not in {"dev", "tst"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev' or 'tst'.")
# hardcoded the mopdb
# ====================================
store = "mopdb"
# ====================================
if store not in {"mopdb", "rar"}:
raise ValueError(f"Unsupported store '{store}'. Expected 'mopdb' or 'rar'.")
p_service_name = "MOPDB" if store == "mopdb" else "RAR"
p_table_owner = "MPEC"
p_table_name = "T_MPEC"
with open(ENV_CONFIG_PATH, "r") as f:
cfg = yaml.safe_load(f)
env_cfg = cfg[env]
store_cfg = cfg[store]
p_objectstore_uri = env_cfg["S3_LOCATION_URI"].replace("{0}",store.lower())
p_run_id = str(ti.run_id)
logging.info("=== init_step === env=%s store=%s run_id=%s", env, store, p_run_id)
logging.info("objectstore_uri=%s", p_objectstore_uri)
xcom = {
"env": env,
"store": store,
"config_path": ENV_CONFIG_PATH,
"p_run_id": p_run_id,
"p_service_name": p_service_name,
"p_table_owner": p_table_owner,
"p_table_name": p_table_name,
"p_objectstore_uri": p_objectstore_uri,
"corporate_store": store_cfg["corporate_store"], # "crp_mopdb" or "crp_rar"
}
print(" ============= DEBUG PARAMS ============= ")
print(xcom)
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# ------------------------------------
# 2) log table (Oracle procedure)
# ------------------------------------
def start_log_table_task(**context):
ti = context["ti"]
# Get parameters from XCom
p_run_id = ti.xcom_pull(task_ids='init_step', key='p_run_id')
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
# Create Oracle connection for this task
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
logging.info("Oracle connection established successfully for start_log_table")
# Execute Oracle procedure using oraconn.run_proc() directly
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.start_log_table',
[p_run_id, p_service_name, p_table_owner, p_table_name]
)
oracle_conn.commit()
logging.info("start_log_table procedure executed successfully")
except Exception as e:
logging.error(f"Error in start_log_table: {e}")
raise
finally:
if oracle_conn:
try:
oracle_conn.close()
logging.info("Oracle connection closed for start_log_table")
except Exception as e:
logging.error(f"Error closing connection in start_log_table: {e}")
t1 = PythonOperator(
task_id='start_log_table',
python_callable=start_log_table_task,
)
# ---------------------------------------------------------
# 3) Export table (Oracle procedure writes to object store)
# ---------------------------------------------------------
def export_table_task(**context):
ti = context["ti"]
# Get parameters from XCom
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
# Create Oracle connection for this task
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
logging.info("Oracle connection established successfully for export_table")
# Execute Oracle procedure using oraconn.run_proc() directly
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.export_table',
[p_service_name, p_table_owner, p_table_name, p_objectstore_uri]
)
oracle_conn.commit()
logging.info("export_table procedure executed successfully")
except Exception as e:
logging.error(f"Error in export_table: {e}")
raise
finally:
if oracle_conn:
try:
oracle_conn.close()
logging.info("Oracle connection closed for export_table")
except Exception as e:
logging.error(f"Error closing connection in export_table: {e}")
t2 = PythonOperator(
task_id='export_table',
python_callable=export_table_task,
trigger_rule=TriggerRule.ALL_DONE, # Continue even if t1 failed
)
# ---------------------------------------------
# 4) Devo / Impyla refresh (Python Package)
# ---------------------------------------------
def devo_impyla_task(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
corporate_store = ti.xcom_pull(task_ids='init_step', key='corporate_store')
config_path = ti.xcom_pull(task_ids='init_step', key='config_path')
owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
table = ti.xcom_pull(task_ids='init_step', key='p_table_name')
# For Impala: corporate_store.table_name (e.g., crp_mopdb.T_MPEC)
# NOT corporate_store.owner.table_name (which would be crp_mopdb.MPEC.T_MPEC - INVALID)
table_name = table # Just "T_MPEC"
logging.info(
"Starting Impyla refresh with env=%s store=%s corporate_store=%s table=%s",
env, store, corporate_store, table_name
)
logging.info("Will execute: INVALIDATE METADATA %s.%s", corporate_store, table_name)
logging.info("Will execute: COMPUTE STATS %s.%s", corporate_store, table_name)
try:
# This should result in queries like:
# INVALIDATE METADATA crp_mopdb.T_MPEC
# COMPUTE STATS crp_mopdb.T_MPEC
status = impala_main(config_path, env, table_name, corporate_store)
logging.info("Impyla (Devo) task finished successfully. Status: %s", status)
return status
except Exception as e:
logging.error(f"Error in devo_impyla_task: {e}")
raise
t3 = PythonOperator(
task_id='devo_impyla',
python_callable=devo_impyla_task,
trigger_rule=TriggerRule.ALL_DONE, # Continue even if t2 failed
)
# -------------------------------------
# 5) End log table (always executes after t1, t2, t3 complete - regardless of success/failure)
# -------------------------------------
def end_log_table_task(**context):
ti = context["ti"]
# Get parameters from XCom
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
# Create Oracle connection for this task
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
logging.info("Oracle connection established successfully for end_log_table")
# Execute Oracle procedure using oraconn.run_proc() directly
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.end_log_table',
[p_service_name, p_table_owner, p_table_name]
)
oracle_conn.commit()
logging.info("end_log_table procedure executed successfully")
except Exception as e:
logging.error(f"Error in end_log_table: {e}")
# Don't raise the exception since this is a cleanup task
logging.info("Continuing despite end_log_table error (cleanup task)")
finally:
if oracle_conn:
try:
oracle_conn.close()
logging.info("Oracle connection closed for end_log_table")
except Exception as e:
logging.error(f"Error closing connection in end_log_table: {e}")
t4 = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
trigger_rule=TriggerRule.ALL_DONE, # Run after t1, t2, t3 complete (success or failure)
)
# -----------------------------------------------------
# 6) Check and fail the DAG if any of t1..t3 actually failed
# This task always runs after t4, but will fail the DAG if needed
# -----------------------------------------------------
def fail_if_any_failed(**context):
dag_run = context['dag_run']
check_tasks = ['start_log_table', 'export_table', 'devo_impyla']
failed = []
for tid in check_tasks:
ti_up = dag_run.get_task_instance(tid)
if ti_up and ti_up.state == 'failed':
failed.append(tid)
if failed:
error_msg = f"Critical task(s) failed: {', '.join(failed)}. DAG execution failed."
logging.error(error_msg)
raise AirflowFailException(error_msg)
logging.info("All critical tasks completed successfully: %s", check_tasks)
t5 = PythonOperator(
task_id='fail_if_any_failed',
python_callable=fail_if_any_failed,
trigger_rule=TriggerRule.ALL_DONE, # Always run after t4
)
# ---------
# Task Dependencies - SEQUENTIAL
# ---------
# Sequential flow: init -> t1 -> t2 -> t3
init >> t1 >> t2 >> t3
# t4 runs after t1, t2, t3 are all done (regardless of success/failure)
[t1, t2, t3] >> t4
# t5 always runs after t4 to check for failures and fail the DAG if needed
t4 >> t5

View File

@@ -0,0 +1,18 @@
from airflow import DAG
#from airflow.providers.oracle.operators.oracle import OracleOperator
from airflow.operators.bash import BashOperator
from datetime import datetime
from airflow import DAG
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
with DAG(
'test_oracle_connection',
start_date=datetime(2025, 6, 13),
schedule_interval=None
) as dag:
test_query = SQLExecuteQueryOperator(
task_id='test_oracle_query',
conn_id='oracle_default',
sql='SELECT 1 FROM DUAL'
)

View File

@@ -0,0 +1,244 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from datetime import datetime, timedelta
import logging
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
# Importing custom modules
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from mrds.core import main as mrds_main
# Configuration Dictionary - First key will be mandatory,
# Workflow: Init → First Task (Sequential) → Parallel Tasks → Finalize
# Input: Only needs PARSE.yaml config file and source filename
TASK_CONFIGS = {
"m_ODS_EXDI_TASK1": {
"source_filename": "EXDI_TASK1.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK1_PARSE.yaml"
},
"m_ODS_EXDI_TASK2": {
"source_filename": "EXDI_TASK2.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK2_PARSE.yaml"
},
"m_ODS_EXDI_TASK3": {
"source_filename": "EXDI_TASK3.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK3_PARSE.yaml"
}
}
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
dag_id = os.path.splitext(os.path.basename(__file__))[0]
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id
}
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Run EXDI data processing workflow with MRDS - Multi-task',
schedule_interval=None,
catchup=False,
tags=["EXDI", "MRDS", "Multi-Task", "ODS"]
) as dag:
def init_workflow_task(**context):
"""Initialize workflow and set up context"""
try:
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing_vars = []
if not username: missing_vars.append("MRDS_LOADER_DB_USER")
if not password: missing_vars.append("MRDS_LOADER_DB_PASS")
if not tnsalias: missing_vars.append("MRDS_LOADER_DB_TNS")
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
workflow_run_id = str(context['ti'].run_id)
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
# Push context to XCom for downstream tasks
ti = context['ti']
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
ti.xcom_push(key='workflow_context', value=workflow_context)
ti.xcom_push(key='env', value=env)
logging.info("Workflow initialization completed successfully")
except Exception as e:
logging.error(f"Error initializing workflow: {e}", exc_info=True)
raise
def run_mrds_task(**context):
"""Run MRDS processing task for EXDI"""
try:
ti = context['ti']
task_id = context['task'].task_id
# Extract task name from task_id
task_name = task_id.replace('_PARSE', '') if task_id.endswith('_PARSE') else task_id
# Get task configuration
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
source_filename = task_config["source_filename"]
config_file = task_config["config_file"]
# Get context from init task
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
if not workflow_context:
raise ValueError("No workflow_context from init task")
# Verify config file exists
if not os.path.exists(config_file):
raise FileNotFoundError(f"PARSE config file not found: {config_file}")
logging.info(f"Processing EXDI MRDS with source_filename: {source_filename}, config_file: {config_file}")
# Run MRDS - file should already be in bucket from EXDI flow
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
logging.info(f"EXDI MRDS task completed successfully for {task_name}")
# Push success status to XCom
ti.xcom_push(key='mrds_success', value=True)
ti.xcom_push(key='task_status', value='SUCCESS')
return "SUCCESS"
except Exception as e:
logging.error(f"Error running EXDI MRDS task: {e}", exc_info=True)
# Push failure status to XCom
ti = context['ti']
ti.xcom_push(key='mrds_success', value=False)
ti.xcom_push(key='task_status', value='FAILED')
ti.xcom_push(key='error_message', value=str(e))
raise
def finalise_workflow_task(**context):
"""Finalize workflow based on overall execution results"""
try:
ti = context['ti']
dag_run = context['dag_run']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
# Check all task statuses
workflow_success = True
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
mrds_task_id = f'{task_name}_PARSE'
mrds_task = dag_run.get_task_instance(mrds_task_id)
if mrds_task.state == 'failed':
workflow_success = False
try:
error_msg = ti.xcom_pull(key='error_message', task_ids=mrds_task_id)
failure_reasons.append(f"{task_name}: MRDS task failed - {error_msg}")
except:
failure_reasons.append(f"{task_name}: MRDS task failed")
# Finalize workflow
if workflow_success:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info(f"Finalised EXDI workflow with history key {a_workflow_history_key} as SUCCESS")
else:
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error(f"Finalised EXDI workflow with history key {a_workflow_history_key} as FAILED")
raise AirflowFailException(f"EXDI Workflow failed: {', '.join(failure_reasons)}")
except AirflowFailException:
raise
except Exception as e:
logging.error(f"Error finalizing EXDI workflow: {e}", exc_info=True)
try:
if 'a_workflow_history_key' in locals() and a_workflow_history_key:
mrds_finalise_workflow(a_workflow_history_key, "N")
except:
pass
raise AirflowFailException(f"EXDI Workflow finalization failed: {e}")
# Create tasks
init_workflow = PythonOperator(
task_id='init_workflow',
python_callable=init_workflow_task,
provide_context=True,
)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
# Get task names - first task runs sequentially, others in parallel
task_names = list(TASK_CONFIGS.keys())
first_task_name = task_names[0]
parallel_task_names = task_names[1:]
# Create first task (sequential)
first_mrds_task = PythonOperator(
task_id=f'{first_task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
)
# Set dependencies for first task
init_workflow >> first_mrds_task >> finalize_workflow
# Create parallel tasks
for task_name in parallel_task_names:
mrds_task = PythonOperator(
task_id=f'{task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
)
# Parallel tasks start after first task completes
first_mrds_task >> mrds_task >> finalize_workflow
logging.info(f"EXDI DAG created with {len(TASK_CONFIGS)} tasks: {list(TASK_CONFIGS.keys())}")

View File

@@ -0,0 +1,44 @@
from datetime import datetime
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
# RUN it if aiflow wokrs or not.
def print_statement():
print("Hello from the Python function!!!!!")
def print_message():
print("Last message from Python!, hope things are going good")
with DAG(
'demo_task_workflow',
start_date=datetime(2025, 6, 13),
schedule_interval=None,
catchup=False,
) as dag:
task1 = BashOperator(
task_id='print_with_bash',
bash_command='echo "Lets begin"',
)
task2 = PythonOperator(
task_id='print_with_python',
python_callable=print_statement,
)
task3 = BashOperator(
task_id='another_bash_task',
bash_command='echo "So far so good!"',
)
task4 = PythonOperator(
task_id='another_python_task',
python_callable=print_message,
)
task1 >> task2 >> task3 >> task4

View File

@@ -0,0 +1,95 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta
import logging
from mrds.utils.manage_runs import init_workflow, finalise_workflow
### CASPER CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
# Importing custom module#s
sys.path.append('/opt/airflow/python/connectors/casper')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
"""username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS") """
# connstr = f"{username}/{password}@{tnsalias}"
# Importing the main function from casper script
from casper_rqsd import main as casper_main
# Default DAG arguments
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 2,
'retry_delay': timedelta(minutes=5),
}
with DAG(
dag_id='rqsd_casper_connector_test',
default_args=default_args,
description='Run Casper RQSD data ingestion workflow',
schedule_interval=None, # we can set later
#start_date=datetime(2025, 10, 7),
catchup=False,
tags=['Casper', 'RQSD', 'Connector'],
) as dag:
def run_casper_rqsd(**context):
try:
#workflow_context = {"run_id": 34, "a_workflow_history_key": 6}
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_casper.yaml"
env_config_path = "/opt/airflow/python/connectors/casper/config/env_config.yaml"
workflow_run_id = str(context['ti'].run_id)
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
# Pick env from ENV variables
env = os.getenv("MRDS_ENV")
#env = os.getenv("MRDS_ENV", "lab")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
raise ValueError(
"Missing one or more required environment variables: "
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
)
logging.info(
f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'"
)
#Calling main()
casper_main(workflow_context, flow_config_path, env_config_path, env)
logging.info("Casper RQSD workflow completed successfully")
except Exception as e:
logging.error(f"Error running Casper RQSD workflow: {e}", exc_info=True)
raise
run_casper = PythonOperator(
task_id='run_casper_rqsd',
python_callable=run_casper_rqsd,
provide_context=True,
)

View File

@@ -0,0 +1,117 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta
import logging
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
# Importing custom modules
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
# Import your functions from manage_runs and devo_connector as before
from mrds.utils.manage_runs import init_workflow, finalise_workflow
from devo_connector import main as devo_main
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 3,
'retry_delay': timedelta(minutes=5),
}
# dynamic name extracton from basename
dag_id = os.path.splitext(os.path.basename(__file__))[0]
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Run devo RQSD data ingestion workflow',
schedule_interval=None,
catchup=False,
tags=["Devo", "RQSD", "Connector"],
) as dag:
def run_devo_connector_rqsd(**context):
try:
env = os.getenv("MRDS_ENV")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
raise ValueError(
"Missing one or more required environment variables: "
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
)
logging.info(f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'")
print("======== THIS ... =========")
print("======== THIS ... =========")
database_name = 'MOPDB'
workflow_name = 'w_MOPDB_RQSD_PROCESS'
workflow_run_id = str(context['ti'].run_id)
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
logging.info("Starting Devo RQSD workflow from Airflow DAG")
devo_main(workflow_context, flow_config_path, env_config_path, env)
logging.info("Devo RQSD workflow completed successfully")
# Push the workflow history key to XCom for downstream tasks
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
except Exception as e:
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
# If init_workflow succeeded but workflow failed, finalize with FAILED status
# Attempt to retrieve the key to finalize
if 'a_workflow_history_key' in locals():
try:
finalise_workflow(a_workflow_history_key, "FAILED")
except Exception as finalise_error:
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
raise
def finalise_workflow_task(**context):
# Pull the workflow_history_key from XCom pushed by the main task
ti = context['ti']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
# Call finalise with SUCCESS status
finalise_workflow(a_workflow_history_key, "SUCCESS")
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
run_devo = PythonOperator(
task_id='run_devo_connector_rqsd',
python_callable=run_devo_connector_rqsd,
provide_context=True,
)
finalize = PythonOperator(
task_id='finalise_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
)
run_devo >> finalize

View File

View File

@@ -0,0 +1,346 @@
from __future__ import annotations
import os
import sys
import logging
import yaml
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.python import PythonOperator
try:
from airflow.exceptions import AirflowFailException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/python/devo_replicator/data_replicator')
from mrds.utils import oraconn
from impala_refresher import main as impala_main
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
ENV_CONFIG_PATH = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_replicator_core',
default_args=default_args,
description='Core Devo replicator workflow for single table',
schedule=None,
catchup=False,
tags=['DevoReplicator'],
max_active_runs=10,
max_active_tasks=16,
) as dag:
# Init - read config from context
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = conf.get("store")
if not store:
raise ValueError("store parameter is required")
store = store.lower()
owner_table = conf.get("owner_table")
if not owner_table or '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
table_owner, table_name = owner_table.split('.', 1)
if env not in {"dev", "tst","acc","prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
if store not in {"mopdb", "rar", "rqsd"}:
raise ValueError(f"Unsupported store '{store}'. Expected 'mopdb', 'rar', 'rqsd'.")
if store == "mopdb":
p_service_name = "MOPDB"
elif store == "rar":
p_service_name = "RAR"
elif store == "rqsd":
p_service_name = "RQSD"
with open(ENV_CONFIG_PATH, "r") as f:
cfg = yaml.safe_load(f)
env_cfg = cfg[env]
store_cfg = cfg[store]
p_objectstore_uri = env_cfg["S3_LOCATION_URI"].replace("{0}", store.lower())
# Get verified run_id using security utilities
p_run_id = get_verified_run_id(context)
logging.info("=== init_step === env=%s store=%s table=%s.%s run_id=%s",
env, store, table_owner, table_name, p_run_id)
xcom = {
"env": env,
"store": store,
"config_path": ENV_CONFIG_PATH,
"p_run_id": p_run_id,
"p_service_name": p_service_name,
"p_table_owner": table_owner,
"p_table_name": table_name,
"p_objectstore_uri": p_objectstore_uri,
"corporate_store": store_cfg["corporate_store"],
"owner_table": owner_table,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Start log table
def start_log_table_task(**context):
ti = context["ti"]
p_run_id = ti.xcom_pull(task_ids='init_step', key='p_run_id')
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.start_log_table',
[p_run_id, p_service_name, p_table_owner, p_table_name]
)
oracle_conn.commit()
logging.info("start_log_table procedure executed successfully")
except Exception as e:
logging.error(f"Error in start_log_table: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='start_log_table',
python_callable=start_log_table_task,
)
# Export table
def export_table_task(**context):
ti = context["ti"]
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.export_table',
[p_service_name, p_table_owner, p_table_name, p_objectstore_uri]
)
oracle_conn.commit()
logging.info("export_table procedure executed successfully")
except Exception as e:
logging.error(f"Error in export_table: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='export_table',
python_callable=export_table_task,
trigger_rule=TriggerRule.ALL_DONE,
)
# Check if previous tasks succeeded before triggering child DAG
def check_previous_tasks_success(**context):
ti = context["ti"]
dag_run = context['dag_run']
store = ti.xcom_pull(task_ids='init_step', key='store')
check_tasks = ['start_log_table', 'export_table']
failed = []
for tid in check_tasks:
ti_up = dag_run.get_task_instance(tid)
if ti_up and ti_up.state != 'success':
failed.append(f"{tid}:{ti_up.state}")
if failed:
error_msg = f"Cannot proceed with {store} table generator. Previous tasks not successful: {', '.join(failed)}"
logging.error(error_msg)
raise AirflowFailException(error_msg)
logging.info(f"All previous tasks succeeded. Ready to trigger {store} table generator.")
return True
t3_check = PythonOperator(
task_id='check_previous_tasks_success',
python_callable=check_previous_tasks_success,
trigger_rule=TriggerRule.ALL_DONE,
)
def drop_table(**context):
ti = context["ti"]
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.clear_s3_bucket',
[p_service_name, p_table_owner, p_table_name + '_COPY', p_objectstore_uri]
)
oracle_conn.commit()
logging.info("clear_s3_bucket for table {0} procedure executed successfully".format(p_table_name[:-5].lower()))
except Exception as e:
logging.error(f"Error in clear_s3_bucket: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t3_drop = PythonOperator(
task_id='drop_table',
python_callable=drop_table,
trigger_rule=TriggerRule.ALL_DONE,
)
# Trigger table generator DAG based on store
def trigger_table_generator(**context):
from airflow.api.common.trigger_dag import trigger_dag
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
# Determine target DAG based on store
if store == "mopdb":
target_dag_id = 'devo_table_generator_trigger_mopdb'
elif store == "rar":
target_dag_id = 'devo_table_generator_trigger_rar'
elif store == "rqsd":
target_dag_id = 'devo_table_generator_trigger_rqsd'
else:
raise ValueError(f"Unsupported store: {store}")
# Add _COPY suffix to table name for the target table
owner_table_with_copy = f"{table_owner}.{table_name}_COPY"
# Create configuration dictionary
trigger_conf = {
"owner_table": owner_table_with_copy
}
logging.info(f"Triggering {target_dag_id} with conf: {trigger_conf}")
try:
dag_run = trigger_dag(
dag_id=target_dag_id,
conf=trigger_conf,
execution_date=None,
replace_microseconds=False
)
logging.info(f"Successfully triggered {target_dag_id}, run_id: {dag_run.run_id}")
ti.xcom_push(key='triggered_dag_run_id', value=dag_run.run_id)
ti.xcom_push(key='triggered_dag_id', value=target_dag_id)
return dag_run.run_id
except Exception as e:
logging.error(f"Error triggering {target_dag_id}: {e}")
raise
t3_trigger = PythonOperator(
task_id='trigger_table_generator',
python_callable=trigger_table_generator,
trigger_rule=TriggerRule.ALL_DONE,
)
# End log table
def end_log_table_task(**context):
ti = context["ti"]
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
oraconn.run_proc(
oracle_conn,
'MRDS_LOADER.DATA_REPLICATOR.end_log_table',
[p_service_name, p_table_owner, p_table_name]
)
oracle_conn.commit()
logging.info("end_log_table procedure executed successfully")
except Exception as e:
logging.error(f"Error in end_log_table: {e}")
logging.info("Continuing despite end_log_table error (cleanup task)")
finally:
if oracle_conn:
oracle_conn.close()
t4 = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
trigger_rule=TriggerRule.ALL_DONE,
)
# Check status and fail if needed
def fail_if_any_failed(**context):
dag_run = context['dag_run']
check_tasks = ['start_log_table', 'export_table', 'check_previous_tasks_success', 'trigger_table_generator']
failed = []
for tid in check_tasks:
ti_up = dag_run.get_task_instance(tid)
if ti_up and ti_up.state == 'failed':
failed.append(tid)
if failed:
error_msg = f"Critical task(s) failed: {', '.join(failed)}. DAG execution failed."
logging.error(error_msg)
raise AirflowFailException(error_msg)
logging.info("All critical tasks completed successfully: %s", check_tasks)
t5 = PythonOperator(
task_id='fail_if_any_failed',
python_callable=fail_if_any_failed,
trigger_rule=TriggerRule.ALL_DONE,
)
# Dependencies
init >> t1 >> t2 >> t3_check >> t3_drop >> t3_trigger
[t1, t2, t3_trigger] >> t4
t4 >> t5

View File

@@ -0,0 +1,239 @@
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.hooks.S3_hook import S3Hook
from datetime import datetime, timedelta
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import io
import os
import logging
import sys
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
SERVICE_NAME = "SERVICE_NAME"
OWNER = "C2D"
TABLE_NAME = "T_CEPH"
METADATA_OWNER = "CT_MOPDB"
METADATA_TABLE = "mopdb_metadata_inventory"
USE_LOCAL_STORAGE = True
LOCAL_OUTPUT_DIR = "/tmp/devo_replicator_output"
S3_BUCKET = "bucket-name"
S3_PREFIX = "devo/replicator/C2D/T_CEPH/"
AWS_CONN_ID = "aws_default"
DEFAULT_ARGS = {
"owner": "airflow",
"depends_on_past": False,
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
}
DAG_ID = "devo_replicator_pandas"
SCHEDULE_INTERVAL = None
CHUNK_SIZE = 100000
def query_oracle_template(owner, table_name):
try:
input_query = """SELECT
COLUMN_NAME,
DATA_TYPE,
CHAR_LENGTH,
DATA_PRECISION,
DATA_SCALE,
COLUMN_ID,
CASE
WHEN DATA_TYPE = 'DATE' OR DATA_TYPE LIKE '%TIMESTAMP%' THEN
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(100)) AS ' || COLUMN_NAME
WHEN DATA_TYPE = 'VARCHAR2' OR DATA_TYPE LIKE '%CHAR%' THEN
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(' || CAST(CHAR_LENGTH AS INT) || ')) AS ' || COLUMN_NAME
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL AND DATA_SCALE IS NOT NULL THEN
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ',' || CAST(DATA_SCALE AS INT) || ')) AS ' || COLUMN_NAME
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL THEN
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ')) AS ' || COLUMN_NAME
WHEN DATA_TYPE = 'CLOB' THEN
'TO_CHAR(SUBSTR(' || COLUMN_NAME || ', 1, 32767)) AS ' || COLUMN_NAME
ELSE
COLUMN_NAME
END AS casting
FROM {0}.{1}
WHERE OWNER = '{2}' AND TABLE_NAME = '{3}' AND A_VALID_TO > SYSDATE
ORDER BY COLUMN_ID""".format(METADATA_OWNER, METADATA_TABLE, owner, table_name)
conn = connect('MRDS_LOADER')
df = pd.read_sql(input_query, conn)
if df.empty:
raise ValueError(f"No metadata found for {owner}.{table_name}")
output_query = 'SELECT ' + ', \n'.join(df['casting'].tolist()) + ' FROM {0}.{1}'.format(owner, table_name)
column_metadata = {}
for _, row in df.iterrows():
col_name = row['COLUMN_NAME']
data_type = row['DATA_TYPE']
if data_type in ('NUMBER', 'DECIMAL', 'FLOAT', 'BINARY_FLOAT', 'BINARY_DOUBLE'):
if pd.notna(row['DATA_SCALE']) and row['DATA_SCALE'] > 0:
column_metadata[col_name] = 'float64'
elif pd.notna(row['DATA_PRECISION']) and row['DATA_PRECISION'] <= 9:
column_metadata[col_name] = 'Int32'
elif pd.notna(row['DATA_PRECISION']) and row['DATA_PRECISION'] <= 18:
column_metadata[col_name] = 'Int64'
else:
column_metadata[col_name] = 'float64'
elif data_type == 'DATE' or 'TIMESTAMP' in data_type:
column_metadata[col_name] = 'string'
else:
column_metadata[col_name] = 'string'
logging.info(f"Generated query template with {len(df)} columns")
return output_query, column_metadata
except Exception as e:
logging.error(f"Error in query_oracle_template: {e}")
raise
finally:
if conn:
conn.close()
def query_oracle_and_generate_parquet(partition_num, partitions, sql, column_metadata, use_local):
logging.info(f"[Pandas-Partition {partition_num}] Starting processing (Mode: {'LOCAL' if use_local else 'S3'})")
partition_sql = f"""SELECT /*+ PARALLEL(t, {partitions}) */ *
FROM (
{sql}
) t
WHERE ORA_HASH(ROWID, {partitions - 1}) = {partition_num}"""
conn = connect("MRDS_LOADER")
if use_local:
os.makedirs(LOCAL_OUTPUT_DIR, exist_ok=True)
output_path = os.path.join(LOCAL_OUTPUT_DIR, f"partition_{partition_num:04d}.parquet")
file_handle = open(output_path, 'wb')
write_target = file_handle
else:
s3_hook = S3Hook(aws_conn_id=AWS_CONN_ID)
s3_client = s3_hook.get_conn()
s3_key = f"{S3_PREFIX}partition_{partition_num:04d}.parquet"
buffer = io.BytesIO()
write_target = buffer
try:
chunk_iterator = pd.read_sql(partition_sql, conn, chunksize=CHUNK_SIZE)
pqwriter = None
total_rows = 0
chunk_count = 0
for chunk in chunk_iterator:
for col, dtype in column_metadata.items():
if col in chunk.columns:
try:
if dtype == 'string':
chunk[col] = chunk[col].astype('string')
elif dtype.startswith('Int'):
chunk[col] = pd.to_numeric(chunk[col], errors='coerce').astype(dtype)
elif dtype == 'float64':
chunk[col] = pd.to_numeric(chunk[col], errors='coerce')
except Exception as e:
logging.warning(f"[Pandas-Partition {partition_num}] Could not optimize column {col}: {e}")
table = pa.Table.from_pandas(chunk, preserve_index=False)
if pqwriter is None:
pqwriter = pq.ParquetWriter(
write_target,
table.schema,
compression='snappy',
use_dictionary=True,
write_statistics=True,
version='2.6',
data_page_size=1024*1024,
)
pqwriter.write_table(table)
total_rows += len(chunk)
chunk_count += 1
if chunk_count % 10 == 0:
if use_local:
file_size_mb = os.path.getsize(output_path) / 1024 / 1024
else:
file_size_mb = write_target.tell() / 1024 / 1024
logging.info(f"[Pandas-Partition {partition_num}] Processed {total_rows:,} rows, Size: {file_size_mb:.2f} MB")
if pqwriter:
pqwriter.close()
if use_local:
file_size_mb = os.path.getsize(output_path) / 1024 / 1024
logging.info(f"[Pandas-Partition {partition_num}] Completed - {total_rows:,} rows, {file_size_mb:.2f} MB saved to {output_path}")
else:
write_target.seek(0)
buffer_size_mb = write_target.getbuffer().nbytes / 1024 / 1024
logging.info(f"[Pandas-Partition {partition_num}] Uploading {buffer_size_mb:.2f} MB to s3://{S3_BUCKET}/{s3_key}")
s3_client.upload_fileobj(write_target, S3_BUCKET, s3_key)
logging.info(f"[Pandas-Partition {partition_num}] Completed - {total_rows:,} rows, {buffer_size_mb:.2f} MB uploaded to S3")
except Exception as e:
logging.error(f"[Pandas-Partition {partition_num}] Error: {e}")
raise
finally:
conn.close()
if use_local:
file_handle.close()
else:
write_target.close()
def generate_tasks(dag, partitions, sql, column_metadata, use_local):
tasks = []
for partition_num in range(partitions):
task = PythonOperator(
task_id=f"generate_parquet_partition_{partition_num}",
python_callable=query_oracle_and_generate_parquet,
op_kwargs={
"partition_num": partition_num,
"partitions": partitions,
"sql": sql,
"column_metadata": column_metadata,
"use_local": use_local
},
provide_context=True,
dag=dag,
)
tasks.append(task)
return tasks
with DAG(
dag_id='devo_replicator_pandas',
default_args=DEFAULT_ARGS,
description='Devo replicator using Pandas with dtype optimization',
schedule_interval=SCHEDULE_INTERVAL,
start_date=datetime(2024, 1, 1),
catchup=False,
tags=['DevoReplicator', 'Pandas'],
max_active_runs=1,
max_active_tasks=30,
) as dag:
query, column_metadata = query_oracle_template(OWNER, TABLE_NAME)
PARTITIONS = 16
partition_tasks = generate_tasks(dag, PARTITIONS, query, column_metadata, USE_LOCAL_STORAGE)

View File

@@ -0,0 +1,244 @@
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.hooks.S3_hook import S3Hook
from datetime import datetime, timedelta
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import io
import logging
import sys
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
SERVICE_NAME = "SERVICE_NAME"
OWNER = "C2D"
TABLE_NAME = "T_CEPH"
METADATA_OWNER = "CT_MOPDB"
METADATA_TABLE = "mopdb_metadata_inventory"
## need to be changed
S3_BUCKET = "bucket-name"
S3_PREFIX = "devo/replicator/C2D/T_CEPH/"
AWS_CONN_ID = "aws_default"
DEFAULT_ARGS = {
"owner": "airflow",
"depends_on_past": False,
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
}
DAG_ID = "devo_replicator_pyarrow"
SCHEDULE_INTERVAL = None
BATCH_SIZE = 100000
def query_oracle_template(owner, table_name):
try:
input_query = """SELECT
CASE
WHEN DATA_TYPE = 'DATE' OR DATA_TYPE LIKE '%TIMESTAMP%' THEN
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(100)) AS ' || COLUMN_NAME
WHEN DATA_TYPE = 'VARCHAR2' OR DATA_TYPE LIKE '%CHAR%' THEN
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(' || CAST(CHAR_LENGTH AS INT) || ')) AS ' || COLUMN_NAME
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL AND DATA_SCALE IS NOT NULL THEN
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ',' || CAST(DATA_SCALE AS INT) || ')) AS ' || COLUMN_NAME
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL THEN
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ')) AS ' || COLUMN_NAME
WHEN DATA_TYPE = 'CLOB' THEN
'TO_CHAR(SUBSTR(' || COLUMN_NAME || ', 1, 32767)) AS ' || COLUMN_NAME
ELSE
COLUMN_NAME
END AS casting
FROM {0}.{1}
WHERE OWNER = '{2}' AND TABLE_NAME = '{3}' AND A_VALID_TO > SYSDATE
ORDER BY COLUMN_ID""".format(METADATA_OWNER, METADATA_TABLE, owner, table_name)
conn = connect('MRDS_LOADER')
df = pd.read_sql(input_query, conn)
if df.empty:
raise ValueError(f"No metadata found for {owner}.{table_name}")
output_query = 'SELECT ' + ', \n'.join(df['casting'].tolist()) + ' FROM {0}.{1}'.format(owner, table_name)
logging.info(f"Generated query template with {len(df)} columns")
return output_query
except Exception as e:
logging.error(f"Error in query_oracle_template: {e}")
raise
finally:
if conn:
conn.close()
def query_oracle_and_generate_parquet(partition_num, partitions, sql):
logging.info(f"[PyArrow-Partition {partition_num}] Starting processing")
partition_sql = f"""SELECT /*+ PARALLEL(t, {partitions}) */ *
FROM (
{sql}
) t
WHERE ORA_HASH(ROWID, {partitions - 1}) = {partition_num}"""
conn = connect("MRDS_LOADER")
cursor = conn.cursor()
cursor.arraysize = BATCH_SIZE
cursor.prefetchrows = BATCH_SIZE
s3_hook = S3Hook(aws_conn_id=AWS_CONN_ID)
s3_client = s3_hook.get_conn()
s3_key = f"{S3_PREFIX}partition_{partition_num:04d}.parquet"
buffer = io.BytesIO()
try:
cursor.execute(partition_sql)
column_names = [desc[0] for desc in cursor.description]
first_batch_rows = cursor.fetchmany(BATCH_SIZE)
if not first_batch_rows:
logging.warning(f"[PyArrow-Partition {partition_num}] No data found")
return
arrow_fields = []
sample_row = first_batch_rows[0]
for i, col_name in enumerate(column_names):
sample_val = sample_row[i]
if sample_val is None:
for row in first_batch_rows[1:]:
if row[i] is not None:
sample_val = row[i]
break
if isinstance(sample_val, str):
arrow_type = pa.string()
elif isinstance(sample_val, int):
arrow_type = pa.int64()
elif isinstance(sample_val, float):
arrow_type = pa.float64()
elif isinstance(sample_val, (datetime, pd.Timestamp)):
arrow_type = pa.timestamp('ns')
elif isinstance(sample_val, bytes):
arrow_type = pa.binary()
else:
arrow_type = pa.string()
arrow_fields.append(pa.field(col_name, arrow_type))
schema = pa.schema(arrow_fields)
writer = pq.ParquetWriter(
buffer,
schema,
compression='snappy',
use_dictionary=True,
write_statistics=True,
data_page_size=2*1024*1024,
version='2.6',
)
def process_batch(rows):
if not rows:
return None
columns_data = list(zip(*rows))
arrays = []
for i, col_data in enumerate(columns_data):
try:
arrays.append(pa.array(col_data, type=schema.field(i).type))
except Exception as e:
logging.warning(f"[PyArrow-Partition {partition_num}] Column {column_names[i]} conversion failed: {e}")
converted = [str(val) if val is not None else None for val in col_data]
arrays.append(pa.array(converted, type=pa.string()))
return pa.RecordBatch.from_arrays(arrays, schema=schema)
batch = process_batch(first_batch_rows)
if batch:
writer.write_batch(batch)
total_rows = len(first_batch_rows)
batch_count = 1
while True:
rows = cursor.fetchmany(BATCH_SIZE)
if not rows:
break
batch = process_batch(rows)
if batch:
writer.write_batch(batch)
total_rows += len(rows)
batch_count += 1
if batch_count % 10 == 0:
buffer_size_mb = buffer.tell() / 1024 / 1024
logging.info(f"[PyArrow-Partition {partition_num}] Processed {total_rows:,} rows, Buffer size: {buffer_size_mb:.2f} MB")
writer.close()
buffer.seek(0)
buffer_size_mb = buffer.getbuffer().nbytes / 1024 / 1024
logging.info(f"[PyArrow-Partition {partition_num}] Uploading {buffer_size_mb:.2f} MB to s3://{S3_BUCKET}/{s3_key}")
s3_client.upload_fileobj(buffer, S3_BUCKET, s3_key)
logging.info(f"[PyArrow-Partition {partition_num}] Completed - {total_rows:,} rows, {buffer_size_mb:.2f} MB uploaded to S3")
except Exception as e:
logging.error(f"[PyArrow-Partition {partition_num}] Error: {e}")
raise
finally:
cursor.close()
conn.close()
buffer.close()
def generate_tasks(dag, partitions, sql):
tasks = []
for partition_num in range(partitions):
task = PythonOperator(
task_id=f"generate_parquet_partition_{partition_num}",
python_callable=query_oracle_and_generate_parquet,
op_kwargs={
"partition_num": partition_num,
"partitions": partitions,
"sql": sql
},
provide_context=True,
dag=dag,
)
tasks.append(task)
return tasks
with DAG(
dag_id='devo_replicator_pyarrow',
default_args=DEFAULT_ARGS,
description='Devo replicator using PyArrow native processing',
schedule_interval=SCHEDULE_INTERVAL,
start_date=datetime(2024, 1, 1),
catchup=False,
tags=['DevoReplicator', 'PyArrow'],
max_active_runs=1,
max_active_tasks=16,
) as dag:
query = query_oracle_template(OWNER, TABLE_NAME)
PARTITIONS = 16
partition_tasks = generate_tasks(dag, PARTITIONS, query)

View File

@@ -0,0 +1,181 @@
# dags/dev_replicator_scheduler_rar.py
from __future__ import annotations
import sys
import logging
from datetime import datetime, timedelta
from airflow import DAG
from airflow.decorators import task
from airflow.operators.python import BranchPythonOperator
from airflow.operators.empty import EmptyOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from datetime import datetime, timedelta
from mrds.utils import oraconn # your Oracle connection helper
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
DAG_NAME = "devo_replicator_scheduler_rar"
TARGET_DAG_ID = "devo_replicator_trigger"
ORACLE_CONN_NAME = "MRDS_LOADER"
PRECONDITION_SQL = """
WITH LAST_UPDATE_ORACLE AS (
SELECT MAX(process_end) AS process_end
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
WHERE UPPER(owner || '.' || target_table_name) = UPPER(:table_name)
AND process_end IS NOT NULL
AND process_successful = 'Y'
),
LAST_UPDATE_DEVO AS (
SELECT CASE
WHEN last_status = 'FINISHED' THEN last_end_time
ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY')
END AS process_end
FROM CT_MRDS.a_devo_replica_mgmt_rar
WHERE owner || '.' || table_name = :table_name
)
SELECT CASE
WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) >
(SELECT process_end FROM LAST_UPDATE_DEVO)
THEN 'Y' ELSE 'N'
END AS trigger_devo_replicator
FROM dual
"""
def _get_conn():
return oraconn.connect(ORACLE_CONN_NAME)
def get_devo_replica_table_options() -> list[str]:
conn = None
cur = None
try:
conn = _get_conn()
cur = conn.cursor()
cur.execute("""
SELECT OWNER || '.' || TABLE_NAME
FROM CT_MRDS.a_devo_replica_mgmt_rar
ORDER BY OWNER, TABLE_NAME
""")
rows = cur.fetchall()
tables = [r[0] for r in rows] if rows else []
logging.info("Fetched %d table(s) from replica mgmt.", len(tables))
return tables
except Exception:
logging.exception("Error getting DEVO replica table options")
return []
finally:
try:
if cur: cur.close()
except Exception:
pass
if conn:
conn.close()
def check_table_precondition(table_full_name: str) -> dict:
"""Returns {"table": <OWNER.TABLE>, "trigger": "Y"|"N"}."""
conn = None
cur = None
try:
conn = _get_conn()
cur = conn.cursor()
cur.execute(PRECONDITION_SQL, {"table_name": table_full_name})
row = cur.fetchone()
status = (row[0] if row else 'N') or 'N'
logging.info("Precondition for %s: %s", table_full_name, status)
return {"table": table_full_name, "trigger": status}
except Exception:
logging.exception("Error checking precondition for %s", table_full_name)
return {"table": table_full_name, "trigger": "N"} # fail closed
finally:
try:
if cur: cur.close()
except Exception:
pass
if conn:
conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id=DAG_NAME,
description="Checks DEVO replica preconditions; triggers devo_replicator_trigger_rar once if any table is stale",
default_args=default_args,
start_date=datetime.now() - timedelta(days=2),
schedule=None,
catchup=False,
schedule_interval='*/10 * * * *', # every 10 minutes
max_active_runs=1,
tags=["DevoScheduler", "DevoReplicatorTrigger"],
) as dag:
@task
def fetch_tables() -> list[str]:
tables = get_devo_replica_table_options()
if not tables:
logging.warning("No tables returned from enumeration.")
return tables
@task
def check_one(table_name: str) -> dict:
return check_table_precondition(table_name)
@task
def summarize(results: list[dict]) -> dict:
y_tables = [r["table"] for r in results if r and r.get("trigger") == "Y"]
n_tables = [r["table"] for r in results if r and r.get("trigger") == "N"]
logging.info("Precondition summary -> Y: %d, N: %d", len(y_tables), len(n_tables))
if y_tables:
logging.info("Tables needing replication: %s", ", ".join(y_tables))
else:
logging.info("No tables are updated/stale; nothing to trigger.")
return {"any_true": bool(y_tables), "y_tables": y_tables}
def decide_branch(summary: dict) -> str:
"""Return the EXACT downstream task_id to follow."""
return "prepare_trigger_conf" if summary.get("any_true") else "no_updates"
@task
def prepare_trigger_conf(summary: dict) -> dict:
"""Single conf payload for the downstream DAG."""
return {"tables_to_replicate": summary.get("y_tables", [])}
no_updates = EmptyOperator(task_id="no_updates")
# Graph
tables = fetch_tables()
results = check_one.expand(table_name=tables) # dynamic mapping across tables
summary = summarize(results)
branch = BranchPythonOperator(
task_id="branch_on_any",
python_callable=decide_branch,
op_args=[summary], # XComArg from summarize
)
conf_payload = prepare_trigger_conf(summary)
trigger_devo = TriggerDagRunOperator(
task_id="trigger_devo_replicator_rar",
trigger_dag_id=TARGET_DAG_ID,
wait_for_completion=True,
reset_dag_run=True,
conf=conf_payload,
)
# Wire branching — only ONE instance of prepare_trigger_conf is referenced
summary >> branch
branch >> no_updates
branch >> conf_payload >> trigger_devo

View File

@@ -0,0 +1,255 @@
from __future__ import annotations
import os
import sys
import logging
import time
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
from airflow.decorators import task
from airflow.providers.oracle.hooks.oracle import OracleHook
from mrds.utils import oraconn
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
ORACLE_CONN_ID = "MRDS_LOADER"
# TARGET_DAG_ID = "devo_replicator_trigger_rar"
def get_rar_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("""
SELECT OWNER || '.' || TABLE_NAME
FROM CT_MRDS.a_devo_replica_mgmt_rar
ORDER BY OWNER, TABLE_NAME
""")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting RAR table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_replicator_trigger',
default_args=default_args,
description='External trigger DAG for RAR tables',
schedule=None,
catchup=False,
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
max_active_runs=1,
params={
# still allow manual runs from the UI
"owner_table": Param(
default=None,
type=["string", "null"],
description="Select table in format OWNER.TABLE_NAME",
#enum=get_rar_table_options()
)
}
) as dag:
# --- Init: read conf ---
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "rar"
owner_table = conf.get("owner_table") # optional single table
tables_to_replicate = conf.get("tables_to_replicate") # optional list of OWNER.TABLE
# Log what we got
if tables_to_replicate:
logging.info("Received tables_to_replicate from upstream: %d table(s).", len(tables_to_replicate))
elif owner_table:
logging.info("Received single owner_table from conf: %s", owner_table)
else:
logging.info("No conf provided; manual UI param may be used or fallback to full list in get_table_list.")
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
xcom = {
"env": env,
"store": store,
"owner_table": owner_table, # may be None
"tables_to_replicate": tables_to_replicate # may be None/list
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# --- Build the processing list ---
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
tables_to_replicate = ti.xcom_pull(task_ids='init_step', key='tables_to_replicate')
# 1) If upstream provided a list, use it
if tables_to_replicate:
logging.info("Using tables_to_replicate list from conf: %d items", len(tables_to_replicate))
tables = []
for ot in tables_to_replicate:
if '.' not in ot:
logging.warning("Skipping malformed owner_table (no dot): %s", ot)
continue
table_owner, table_name = ot.split('.', 1)
tables.append((table_owner, table_name))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
# 2) Else if a single owner_table provided (manual/programmatic)
if owner_table:
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing single table from conf/params: %s", owner_table)
ti.xcom_push(key='tables_to_process', value=tables)
return tables
# 3) Else fallback to full list in DB (manual run without conf)
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("""
SELECT OWNER, TABLE_NAME
FROM CT_MRDS.a_devo_replica_mgmt_rar
ORDER BY OWNER, TABLE_NAME
""")
tables = cursor.fetchall()
cursor.close()
logging.info("Fallback: Found %d tables for RAR", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# --- Keep your existing throttled triggering logic unchanged ---
def check_and_trigger(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
threshold = 30 # you were pushing 30; keep it here or push from init
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
while True:
cursor = oracle_conn.cursor()
service_name = store.upper()
sql_query = f"""
SELECT
(SELECT NVL(SUM(MAX_THREADS),0) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB WHERE LAST_STATUS = 'RUNNING') +
(SELECT NVL(SUM(MAX_THREADS),0) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR WHERE LAST_STATUS = 'RUNNING')
AS TOTAL_RUNNING_THREADS_NOW,
(SELECT COUNT(*)
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
FROM DUAL
"""
cursor.execute(sql_query)
total_running_val, table_running_val = cursor.fetchone()
cursor.close()
logging.info(
"Total running: %d, threshold: %d, table running: %d",
total_running_val or 0, threshold, table_running_val or 0
)
if (total_running_val or 0) > threshold:
logging.info("Threshold exceeded. Waiting 5 minutes...")
time.sleep(300)
continue
if (table_running_val or 0) >= 1:
logging.info("Table %s.%s already running. Skipping.", table_owner, table_name)
break
# Trigger the core DAG for this specific table
from airflow.api.common.trigger_dag import trigger_dag
conf = {"store": store, "owner_table": f"{table_owner}.{table_name}"}
trigger_dag(
dag_id='devo_replicator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
break
logging.info("Total core DAGs triggered: %d", triggered_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
except Exception as e:
logging.error(f"Error in check_and_trigger: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='check_and_trigger',
python_callable=check_and_trigger,
)
init >> t1 >> t2
"""
Reading tables_to_replicate from dag_run.conf in init_step.
Pushing it to XCom (so get_table_list can use it).
Tell get_table_list to prioritize the provided list.
init_step reads tables_to_replicate from dag_run.conf and puts it into XCom.
get_table_list prioritizes that list; falls back to owner_table or full table list only if needed.
check_and_trigger loops over those tables and triggers your core DAG (devo_replicator_core) per table, respecting your concurrency threshold.
"""

View File

@@ -0,0 +1,257 @@
from __future__ import annotations
import os
import sys
import logging
import time
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
# Get MOPDB table options for dropdown
def get_mopdb_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting MOPDB table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_replicator_trigger_mopdb',
default_args=default_args,
description='External trigger DAG for MOPDB tables',
schedule=None,
catchup=False,
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
params={
"owner_table": Param(
default=None,
type="string",
description="Select table in format OWNER.TABLE_NAME",
enum=get_mopdb_table_options()
)
}
) as dag:
# Init
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "mopdb"
owner_table = conf.get("owner_table")
if not owner_table:
raise ValueError("owner_table parameter is required")
if '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
table_owner, table_name = owner_table.split('.', 1)
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
logging.info("=== init_step === env=%s store=%s owner_table=%s",
env, store, owner_table)
xcom = {
"env": env,
"store": store,
"table_owner": table_owner,
"table_name": table_name,
"owner_table": owner_table,
"threshold": 30,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Get table list
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
if owner_table:
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing specific table: %s", owner_table)
else:
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
tables = cursor.fetchall()
cursor.close()
logging.info("Found %d tables for MOPDB", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# Check and trigger core DAG
def check_and_trigger(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
while True:
cursor = oracle_conn.cursor()
# Execute SQL query with variable substitution
service_name = store.upper()
sql_query = f"""
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
WHERE LAST_STATUS = 'RUNNING') +
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
WHERE LAST_STATUS = 'RUNNING')
AS TOTAL_RUNNING_THREADS_NOW,
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
FROM DUAL
"""
cursor.execute(sql_query)
result = cursor.fetchone()
total_running_val = result[0] or 0
table_running_val = result[1] or 0
cursor.close()
logging.info("Total running: %d, threshold: %d, table running: %d",
total_running_val, threshold, table_running_val)
if total_running_val > threshold:
logging.info("Threshold exceeded. Waiting 5 minutes...")
time.sleep(300)
continue
if table_running_val >= 1:
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
break
# Trigger core DAG
from airflow.api.common.trigger_dag import trigger_dag
conf = {
"store": store,
"owner_table": f"{table_owner}.{table_name}"
}
trigger_dag(
dag_id='devo_replicator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
break
logging.info("Total DAGs triggered: %d", triggered_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
except Exception as e:
logging.error(f"Error in check_and_trigger: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='check_and_trigger',
python_callable=check_and_trigger,
)
# Dependencies
init >> t1 >> t2
"""
MOPDB Trigger DAG
1) init_step
- Gets environment from MRDS_ENV environment variable
- Reads owner_table parameter from DAG configuration
- Validates owner_table format (must be OWNER.TABLE_NAME)
- Sets store to "mopdb" (fixed for this DAG)
- Sets threshold to 30 (max concurrent running threads)
- Pushes parameters to XCom
2) get_table_list
- Connects to Oracle database (MRDS_LOADER)
- If specific owner_table provided: creates single table list
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb
- Returns list of (owner, table_name) tuples to process
- Pushes table list to XCom
3) check_and_trigger
- Loops through each table from the table list
- For each table, enters monitoring loop:
- Executes SQL query to check total running threads across MOPDB+RAR
- Checks if current table is already running
- If total threads > threshold (30): waits 5 minutes and rechecks
- If table already running: skips to next tabl
- If conditions met: triggers core DAG with table parameters
- Counts and logs total number of DAGs triggered
- Ensures system doesn't exceed concurrent processing limits
"""

View File

@@ -0,0 +1,257 @@
from __future__ import annotations
import os
import sys
import logging
import time
from datetime import datetime, timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
# Get MOPDB table options for dropdown
def get_rar_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting RAR table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_replicator_trigger_rar',
default_args=default_args,
description='External trigger DAG for RAR tables',
schedule=None,
catchup=False,
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
params={
"owner_table": Param(
default=None,
type="string",
description="Select table in format OWNER.TABLE_NAME",
enum=get_rar_table_options()
)
}
) as dag:
# Init
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "rar"
owner_table = conf.get("owner_table")
if not owner_table:
raise ValueError("owner_table parameter is required")
if '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
table_owner, table_name = owner_table.split('.', 1)
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
logging.info("=== init_step === env=%s store=%s owner_table=%s",
env, store, owner_table)
xcom = {
"env": env,
"store": store,
"table_owner": table_owner,
"table_name": table_name,
"owner_table": owner_table,
"threshold": 30,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Get table list
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
if owner_table:
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing specific table: %s", owner_table)
else:
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
tables = cursor.fetchall()
cursor.close()
logging.info("Found %d tables for RAR", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# Check and trigger core DAG
def check_and_trigger(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
while True:
cursor = oracle_conn.cursor()
# Execute SQL query with variable substitution
service_name = store.upper()
sql_query = f"""
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
WHERE LAST_STATUS = 'RUNNING') +
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
WHERE LAST_STATUS = 'RUNNING')
AS TOTAL_RUNNING_THREADS_NOW,
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
FROM DUAL
"""
cursor.execute(sql_query)
result = cursor.fetchone()
total_running_val = result[0] or 0
table_running_val = result[1] or 0
cursor.close()
logging.info("Total running: %d, threshold: %d, table running: %d",
total_running_val, threshold, table_running_val)
if total_running_val > threshold:
logging.info("Threshold exceeded. Waiting 5 minutes...")
time.sleep(300)
continue
if table_running_val >= 1:
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
break
# Trigger core DAG
from airflow.api.common.trigger_dag import trigger_dag
conf = {
"store": store,
"owner_table": f"{table_owner}.{table_name}"
}
trigger_dag(
dag_id='devo_replicator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
break
logging.info("Total DAGs triggered: %d", triggered_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
except Exception as e:
logging.error(f"Error in check_and_trigger: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='check_and_trigger',
python_callable=check_and_trigger,
)
# Dependencies
init >> t1 >> t2
"""
RAR Trigger DAG
1) init_step
- Gets environment from MRDS_ENV environment variable
- Reads owner_table parameter from DAG configuration
- Validates owner_table format (must be OWNER.TABLE_NAME)
- Sets store to "rar" (fixed for this DAG)
- Sets threshold to 30 (max concurrent running threads)
- Pushes parameters to XCom
2) get_table_list
- Connects to Oracle database (MRDS_LOADER)
- If specific owner_table provided: creates single table list
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar
- Returns list of (owner, table_name) tuples to process
- Pushes table list to XCom
3) check_and_trigger
- Loops through each table from the table list
- For each table, enters monitoring loop:
- Executes SQL query to check total running threads across MOPDB+RAR
- Checks if current table is already running
- If total threads > threshold (30): waits 5 minutes and rechecks
- If table already running: skips to next tabl
- If conditions met: triggers core DAG with table parameters
- Counts and logs total number of DAGs triggered
- Ensures system doesn't exceed concurrent processing limits
"""

View File

@@ -0,0 +1,257 @@
from __future__ import annotations
import os
import sys
import logging
import time
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
# Get RQSD table options for dropdown
def get_rqsd_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting RQSD table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_replicator_trigger_rqsd',
default_args=default_args,
description='External trigger DAG for RQSD tables',
schedule=None,
catchup=False,
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
params={
"owner_table": Param(
default=None,
type="string",
description="Select table in format OWNER.TABLE_NAME",
enum=get_rqsd_table_options()
)
}
) as dag:
# Init
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "rqsd"
owner_table = conf.get("owner_table")
if not owner_table:
raise ValueError("owner_table parameter is required")
if '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
table_owner, table_name = owner_table.split('.', 1)
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
logging.info("=== init_step === env=%s store=%s owner_table=%s",
env, store, owner_table)
xcom = {
"env": env,
"store": store,
"table_owner": table_owner,
"table_name": table_name,
"owner_table": owner_table,
"threshold": 30,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Get table list
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
if owner_table:
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing specific table: %s", owner_table)
else:
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
tables = cursor.fetchall()
cursor.close()
logging.info("Found %d tables for RQSD", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# Check and trigger core DAG
def check_and_trigger(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
while True:
cursor = oracle_conn.cursor()
# Execute SQL query with variable substitution
service_name = store.upper()
sql_query = f"""
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
WHERE LAST_STATUS = 'RUNNING') +
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
WHERE LAST_STATUS = 'RUNNING')
AS TOTAL_RUNNING_THREADS_NOW,
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
FROM DUAL
"""
cursor.execute(sql_query)
result = cursor.fetchone()
total_running_val = result[0] or 0
table_running_val = result[1] or 0
cursor.close()
logging.info("Total running: %d, threshold: %d, table running: %d",
total_running_val, threshold, table_running_val)
if total_running_val > threshold:
logging.info("Threshold exceeded. Waiting 5 minutes...")
time.sleep(300)
continue
if table_running_val >= 1:
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
break
# Trigger core DAG
from airflow.api.common.trigger_dag import trigger_dag
conf = {
"store": store,
"owner_table": f"{table_owner}.{table_name}"
}
trigger_dag(
dag_id='devo_replicator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
break
logging.info("Total DAGs triggered: %d", triggered_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
except Exception as e:
logging.error(f"Error in check_and_trigger: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='check_and_trigger',
python_callable=check_and_trigger,
)
# Dependencies
init >> t1 >> t2
"""
RQSD Trigger DAG
1) init_step
- Gets environment from MRDS_ENV environment variable
- Reads owner_table parameter from DAG configuration
- Validates owner_table format (must be OWNER.TABLE_NAME)
- Sets store to "rqsd" (fixed for this DAG)
- Sets threshold to 30 (max concurrent running threads)
- Pushes parameters to XCom
2) get_table_list
- Connects to Oracle database (MRDS_LOADER)
- If specific owner_table provided: creates single table list
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rqsd
- Returns list of (owner, table_name) tuples to process
- Pushes table list to XCom
3) check_and_trigger
- Loops through each table from the table list
- For each table, enters monitoring loop:
- Executes SQL query to check total running threads across MOPDB+RAR
- Checks if current table is already running
- If total threads > threshold (30): waits 5 minutes and rechecks
- If table already running: skips to next tabl
- If conditions met: triggers core DAG with table parameters
- Counts and logs total number of DAGs triggered
- Ensures system doesn't exceed concurrent processing limits
"""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,255 @@
from __future__ import annotations
import os
import sys
import logging
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
# Get MOPDB table options for dropdown
def get_mopdb_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
options = ["__ALL_EXCLUDE_COPY__"] + [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting MOPDB table options: {e}")
return ["__ALL_EXCLUDE_COPY__"]
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
}
with DAG(
dag_id='devo_table_generator_trigger_mopdb',
default_args=default_args,
description='External trigger DAG for MOPDB tables',
schedule=None,
catchup=False,
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
params={
"owner_table": Param(
default="__ALL_EXCLUDE_COPY__",
type="string",
description="Select '__ALL_EXCLUDE_COPY__' to run all tables without _COPY, or select specific table in format OWNER.TABLE_NAME",
enum=get_mopdb_table_options()
)
}
) as dag:
# Init
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "mopdb"
owner_table = conf.get("owner_table")
if not owner_table:
raise ValueError("owner_table parameter is required")
# Handle special "run all" case
run_all_exclude_copy = (owner_table == "__ALL_EXCLUDE_COPY__")
if not run_all_exclude_copy and '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME' or '__ALL_EXCLUDE_COPY__'")
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
logging.info("=== init_step === env=%s store=%s owner_table=%s run_all_exclude_copy=%s",
env, store, owner_table, run_all_exclude_copy)
xcom = {
"env": env,
"store": store,
"owner_table": owner_table,
"run_all_exclude_copy": run_all_exclude_copy,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Get table list
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
run_all_exclude_copy = ti.xcom_pull(task_ids='init_step', key='run_all_exclude_copy')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
if run_all_exclude_copy:
# Get all tables excluding those with _COPY in the name
cursor = oracle_conn.cursor()
cursor.execute("""
SELECT OWNER, TABLE_NAME
FROM CT_MRDS.a_devo_replica_mgmt_mopdb
WHERE TABLE_NAME NOT LIKE '%_COPY%'
ORDER BY OWNER, TABLE_NAME
""")
tables = cursor.fetchall()
cursor.close()
logging.info("Processing ALL tables excluding _COPY: %d tables found", len(tables))
elif owner_table:
# Process specific table
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing specific table: %s", owner_table)
else:
# Fallback: get all tables
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
tables = cursor.fetchall()
cursor.close()
logging.info("Found %d tables for MOPDB", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# Trigger core DAG for each table
def trigger_tables(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
skipped_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
cursor = oracle_conn.cursor()
# Check if table is already running
service_name = store.upper()
sql_query = f"""
SELECT COUNT(*)
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}'
AND TABLE_NAME = '{table_name}'
AND LAST_STATUS = 'RUNNING'
"""
cursor.execute(sql_query)
result = cursor.fetchone()
table_running_val = result[0] or 0
cursor.close()
if table_running_val >= 1:
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
skipped_count += 1
continue
# Trigger core DAG
from airflow.api.common.trigger_dag import trigger_dag
conf = {
"store": store,
"owner_table": f"{table_owner}.{table_name}"
}
trigger_dag(
dag_id='devo_table_generator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
triggered_count, skipped_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
ti.xcom_push(key='skipped_count', value=skipped_count)
except Exception as e:
logging.error(f"Error in trigger_tables: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='trigger_tables',
python_callable=trigger_tables,
)
# Dependencies
init >> t1 >> t2
"""
MOPDB Trigger DAG
1) init_step
- Gets environment from MRDS_ENV environment variable
- Reads owner_table parameter from DAG configuration
- Validates owner_table format (must be OWNER.TABLE_NAME or __ALL_EXCLUDE_COPY__)
- Sets store to "mopdb" (fixed for this DAG)
- Determines if running all tables excluding _COPY
- Pushes parameters to XCom
2) get_table_list
- Connects to Oracle database (MRDS_LOADER)
- If __ALL_EXCLUDE_COPY__: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb excluding _COPY tables
- If specific owner_table provided: creates single table list
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb
- Returns list of (owner, table_name) tuples to process
- Pushes table list to XCom
3) trigger_tables
- Loops through each table from the table list
- For each table:
- Checks if table is already running
- If table already running: skips to next table
- If not running: triggers core DAG with table parameters
- Counts and logs total number of DAGs triggered and skipped
- No threshold checking or waiting logic
"""

View File

@@ -0,0 +1,257 @@
from __future__ import annotations
import os
import sys
import logging
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
# Get RAR table options for dropdown
def get_rar_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
options = ["__ALL_EXCLUDE_COPY__"] + [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting RAR table options: {e}")
return ["__ALL_EXCLUDE_COPY__"]
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_table_generator_trigger_rar',
default_args=default_args,
description='External trigger DAG for RAR tables',
schedule=None,
catchup=False,
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
params={
"owner_table": Param(
default="__ALL_EXCLUDE_COPY__",
type="string",
description="Select '__ALL_EXCLUDE_COPY__' to run all tables without _COPY, or select specific table in format OWNER.TABLE_NAME",
enum=get_rar_table_options()
)
}
) as dag:
# Init
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "rar"
owner_table = conf.get("owner_table")
if not owner_table:
raise ValueError("owner_table parameter is required")
# Handle special "run all" case
run_all_exclude_copy = (owner_table == "__ALL_EXCLUDE_COPY__")
if not run_all_exclude_copy and '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME' or '__ALL_EXCLUDE_COPY__'")
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
logging.info("=== init_step === env=%s store=%s owner_table=%s run_all_exclude_copy=%s",
env, store, owner_table, run_all_exclude_copy)
xcom = {
"env": env,
"store": store,
"owner_table": owner_table,
"run_all_exclude_copy": run_all_exclude_copy,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Get table list
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
run_all_exclude_copy = ti.xcom_pull(task_ids='init_step', key='run_all_exclude_copy')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
if run_all_exclude_copy:
# Get all tables excluding those with _COPY in the name
cursor = oracle_conn.cursor()
cursor.execute("""
SELECT OWNER, TABLE_NAME
FROM CT_MRDS.a_devo_replica_mgmt_rar
WHERE TABLE_NAME NOT LIKE '%_COPY%'
ORDER BY OWNER, TABLE_NAME
""")
tables = cursor.fetchall()
cursor.close()
logging.info("Processing ALL tables excluding _COPY: %d tables found", len(tables))
elif owner_table:
# Process specific table
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing specific table: %s", owner_table)
else:
# Fallback: get all tables
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
tables = cursor.fetchall()
cursor.close()
logging.info("Found %d tables for RAR", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# Trigger core DAG for each table
def trigger_tables(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
skipped_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
cursor = oracle_conn.cursor()
# Check if table is already running
service_name = store.upper()
sql_query = f"""
SELECT COUNT(*)
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}'
AND TABLE_NAME = '{table_name}'
AND LAST_STATUS = 'RUNNING'
"""
cursor.execute(sql_query)
result = cursor.fetchone()
table_running_val = result[0] or 0
cursor.close()
if table_running_val >= 1:
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
skipped_count += 1
continue
# Trigger core DAG
from airflow.api.common.trigger_dag import trigger_dag
conf = {
"store": store,
"owner_table": f"{table_owner}.{table_name}"
}
trigger_dag(
dag_id='devo_table_generator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
triggered_count, skipped_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
ti.xcom_push(key='skipped_count', value=skipped_count)
except Exception as e:
logging.error(f"Error in trigger_tables: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='trigger_tables',
python_callable=trigger_tables,
)
# Dependencies
init >> t1 >> t2
"""
RAR Trigger DAG
1) init_step
- Gets environment from MRDS_ENV environment variable
- Reads owner_table parameter from DAG configuration
- Validates owner_table format (must be OWNER.TABLE_NAME or __ALL_EXCLUDE_COPY__)
- Sets store to "rar" (fixed for this DAG)
- Determines if running all tables excluding _COPY
- Pushes parameters to XCom
2) get_table_list
- Connects to Oracle database (MRDS_LOADER)
- If __ALL_EXCLUDE_COPY__: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar excluding _COPY tables
- If specific owner_table provided: creates single table list
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar
- Returns list of (owner, table_name) tuples to process
- Pushes table list to XCom
3) trigger_tables
- Loops through each table from the table list
- For each table:
- Checks if table is already running
- If table already running: skips to next table
- If not running: triggers core DAG with table parameters
- Counts and logs total number of DAGs triggered and skipped
- No threshold checking or waiting logic
"""

View File

@@ -0,0 +1,239 @@
from __future__ import annotations
import os
import sys
import logging
from datetime import timedelta
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python import PythonOperator
from airflow.models import Param
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
from mrds.utils import oraconn
# Get RQSD table options for dropdown
def get_rqsd_table_options():
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
options = [row[0] for row in cursor.fetchall()]
cursor.close()
return options
except Exception as e:
logging.error(f"Error getting RQSD table options: {e}")
return []
finally:
if oracle_conn:
oracle_conn.close()
default_args = {
'owner': 'devo',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
with DAG(
dag_id='devo_table_generator_trigger_rqsd',
default_args=default_args,
description='External trigger DAG for RQSD tables',
schedule=None,
catchup=False,
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
params={
"owner_table": Param(
default=None,
type="string",
description="Select table in format OWNER.TABLE_NAME",
enum=get_rqsd_table_options()
)
}
) as dag:
# Init
def init_step(**context):
dag_run = context.get("dag_run")
ti = context["ti"]
conf = (dag_run.conf or {}) if dag_run else {}
env = os.getenv("MRDS_ENV")
if not env:
raise ValueError("MRDS_ENV environment variable is required")
env = env.lower()
store = "rqsd"
owner_table = conf.get("owner_table")
if not owner_table:
raise ValueError("owner_table parameter is required")
if '.' not in owner_table:
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
table_owner, table_name = owner_table.split('.', 1)
if env not in {"dev", "tst", "acc", "prd"}:
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
logging.info("=== init_step === env=%s store=%s owner_table=%s",
env, store, owner_table)
xcom = {
"env": env,
"store": store,
"table_owner": table_owner,
"table_name": table_name,
"owner_table": owner_table,
}
for k, v in xcom.items():
ti.xcom_push(key=k, value=v)
init = PythonOperator(
task_id='init_step',
python_callable=init_step,
)
# Get table list
def get_table_list(**context):
ti = context["ti"]
store = ti.xcom_pull(task_ids='init_step', key='store')
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
if owner_table:
table_owner, table_name = owner_table.split('.', 1)
tables = [(table_owner, table_name)]
logging.info("Processing specific table: %s", owner_table)
else:
cursor = oracle_conn.cursor()
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
tables = cursor.fetchall()
cursor.close()
logging.info("Found %d tables for RQSD", len(tables))
ti.xcom_push(key='tables_to_process', value=tables)
return tables
except Exception as e:
logging.error(f"Error in get_table_list: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t1 = PythonOperator(
task_id='get_table_list',
python_callable=get_table_list,
)
# Trigger core DAG for each table
def trigger_tables(**context):
ti = context["ti"]
env = ti.xcom_pull(task_ids='init_step', key='env')
store = ti.xcom_pull(task_ids='init_step', key='store')
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
oracle_conn = None
triggered_count = 0
skipped_count = 0
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
for table_owner, table_name in tables:
logging.info("Processing table: %s.%s", table_owner, table_name)
cursor = oracle_conn.cursor()
# Check if table is already running
service_name = store.upper()
sql_query = f"""
SELECT COUNT(*)
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
WHERE OWNER = '{table_owner}'
AND TABLE_NAME = '{table_name}'
AND LAST_STATUS = 'RUNNING'
"""
cursor.execute(sql_query)
result = cursor.fetchone()
table_running_val = result[0] or 0
cursor.close()
if table_running_val >= 1:
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
skipped_count += 1
continue
# Trigger core DAG
from airflow.api.common.trigger_dag import trigger_dag
conf = {
"store": store,
"owner_table": f"{table_owner}.{table_name}"
}
trigger_dag(
dag_id='devo_table_generator_core',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
triggered_count, skipped_count)
ti.xcom_push(key='triggered_count', value=triggered_count)
ti.xcom_push(key='skipped_count', value=skipped_count)
except Exception as e:
logging.error(f"Error in trigger_tables: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
t2 = PythonOperator(
task_id='trigger_tables',
python_callable=trigger_tables,
)
# Dependencies
init >> t1 >> t2
"""
RQSD Trigger DAG
1) init_step
- Gets environment from MRDS_ENV environment variable
- Reads owner_table parameter from DAG configuration
- Validates owner_table format (must be OWNER.TABLE_NAME)
- Sets store to "rqsd" (fixed for this DAG)
- Pushes parameters to XCom
2) get_table_list
- Connects to Oracle database (MRDS_LOADER)
- If specific owner_table provided: creates single table list
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rqsd
- Returns list of (owner, table_name) tuples to process
- Pushes table list to XCom
3) trigger_tables
- Loops through each table from the table list
- For each table:
- Checks if table is already running
- If table already running: skips to next table
- If not running: triggers core DAG with table parameters
- Counts and logs total number of DAGs triggered and skipped
- No threshold checking or waiting logic
"""

View File

@@ -0,0 +1,146 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT = DbtTaskGroup(
group_id="m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,156 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT = DbtTaskGroup(
group_id="m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,152 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET = DbtTaskGroup(
group_id="m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,147 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT = DbtTaskGroup(
group_id="m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,157 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS = DbtTaskGroup(
group_id="m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,149 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST = DbtTaskGroup(
group_id="m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,155 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT = DbtTaskGroup(
group_id="m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,175 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES = DbtTaskGroup(
group_id="m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(
select=["tag:m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES"],
exclude=["m_MOPDB_CSDB_DEBT_DAILY_OU_CSDB_DEBT_DAILY_SQ"],
),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
def trigger_devo_replicator(**context):
from airflow.api.common.trigger_dag import trigger_dag
target_dag_id = 'devo_replicator_trigger_mopdb'
# Create configuration dictionary
trigger_conf = {
"owner_table": "LM.T_STANDING_FACILITIES"
}
try:
dag_run = trigger_dag(
dag_id=target_dag_id,
conf=trigger_conf,
execution_date=None,
replace_microseconds=False
)
return dag_run.run_id
except Exception as e:
raise
trigger_replicator = PythonOperator(
task_id='trigger_devo_replicator',
python_callable=trigger_devo_replicator,
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES >> control_external_run_end >>trigger_replicator >> dag_status
globals()[DAG_NAME] = run_dag()

View File

View File

View File

@@ -0,0 +1,316 @@
import os
######
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.api.common.trigger_dag import trigger_dag
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
from mrds.utils import oraconn
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
# Get verified run_id using security utilities
run_id = get_verified_run_id(kwargs)
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
def get_rqsd_tables_to_replicate(**kwargs):
"""
Get list of RQSD tables from a_devo_replica_mgmt_rqsd,
excluding tables ending with _COPY
"""
import logging
oracle_conn = None
try:
oracle_conn = oraconn.connect('MRDS_LOADER')
cursor = oracle_conn.cursor()
# Query to get all tables excluding _COPY versions
sql = """
SELECT OWNER, TABLE_NAME
FROM CT_MRDS.a_devo_replica_mgmt_rqsd
WHERE TABLE_NAME NOT LIKE '%_COPY'
ORDER BY OWNER, TABLE_NAME
"""
cursor.execute(sql)
tables = cursor.fetchall()
cursor.close()
logging.info(f"Found {len(tables)} RQSD tables to replicate (excluding _COPY versions)")
# Convert to list of owner.table_name format
table_list = [f"{owner}.{table_name}" for owner, table_name in tables]
# Push to XCom for next task
kwargs["ti"].xcom_push(key="rqsd_tables", value=table_list)
return table_list
except Exception as e:
logging.error(f"Error getting RQSD tables: {e}")
raise
finally:
if oracle_conn:
oracle_conn.close()
def trigger_rqsd_replication(**kwargs):
"""
Trigger devo_replicator_trigger_rqsd for each table in the list
"""
import logging
from datetime import datetime
ti = kwargs["ti"]
table_list = ti.xcom_pull(task_ids="get_rqsd_tables", key="rqsd_tables")
if not table_list:
logging.warning("No RQSD tables found to replicate")
return
logging.info(f"Triggering replication for {len(table_list)} tables")
triggered_count = 0
failed_triggers = []
for owner_table in table_list:
try:
conf = {
"owner_table": owner_table
}
trigger_dag(
dag_id='devo_replicator_trigger_rqsd',
conf=conf,
execution_date=None,
replace_microseconds=False
)
triggered_count += 1
logging.info(f"Successfully triggered replication for {owner_table}")
except Exception as e:
logging.error(f"Failed to trigger replication for {owner_table}: {e}")
failed_triggers.append(owner_table)
logging.info(f"Replication triggered for {triggered_count}/{len(table_list)} tables")
if failed_triggers:
logging.warning(f"Failed to trigger replication for: {', '.join(failed_triggers)}")
# Push results to XCom
ti.xcom_push(key="triggered_count", value=triggered_count)
ti.xcom_push(key="failed_triggers", value=failed_triggers)
return {
"triggered_count": triggered_count,
"total_tables": len(table_list),
"failed_triggers": failed_triggers
}
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA = DbtTaskGroup(
group_id="m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
# Get list of RQSD tables to replicate
get_rqsd_tables = PythonOperator(
task_id="get_rqsd_tables",
python_callable=get_rqsd_tables_to_replicate,
provide_context=True,
)
# Trigger replication for all RQSD tables
trigger_rqsd_replication_task = PythonOperator(
task_id="trigger_rqsd_replication",
python_callable=trigger_rqsd_replication,
provide_context=True,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
# dependency chain
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> [
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS,
] >> m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA >> control_external_run_end >> get_rqsd_tables >> trigger_rqsd_replication_task >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,216 @@
"""
NEW CHANGES FOR MANUAL:
Changed DAG_NAME from dynamic file-based to hardcoded "w_MOPDB_RQSD_PROCESS_MANUAL"
Changed the last task group from m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA to m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL
Updated the tag in the RenderConfig to "tag:m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL"
Added tags=["MOPDB", "RQSD", "DBT", "MANUAL"] to the DAG decorator
KEEP IDENTICAL :
All other task groups remain the same as the original MOPDB DAG
Flow structure remains identical:
read_vars → retrieve_run_id → control_external_run_start
→ 5 parallel ANNEX tasks → MANUAL output task → control_external_run_end → dag_status
"""
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
DAG_NAME = "w_MOPDB_RQSD_PROCESS_MANUAL"
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = get_verified_run_id(kwargs)
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
tags=["MOPDB", "RQSD", "DBT", "MANUAL"]
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
group_id="m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
]),
operator_args=common_operator_args,
)
m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL = DbtTaskGroup(
group_id="m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> [
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS,
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS,
] >> m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

View File

@@ -0,0 +1,156 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT = DbtTaskGroup(
group_id="m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

View File

@@ -0,0 +1,156 @@
import os
from airflow.decorators import dag
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
DATABASE_NAME_MAP = {
"dev": "MOPDB",
"test": "MOPDB_TEST",
}
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
# check cron 2
dbt_root_path = "/opt/dbt"
dbt_profiles_dir = "/opt/dbt/profiles.yml"
dbt_profiles_dir_parent = "/opt/dbt"
dbt_env = {
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
"DBT_TARGET": ENV_NAME,
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
"DBT_LOG_PATH": "/opt/dbt/logs",
"DBT_TARGET_PATH": "/opt/dbt/target",
"PYTHONUNBUFFERED": "1",
}
def retrieve_run_id(**kwargs):
run_id = kwargs["run_id"]
kwargs["ti"].xcom_push(key="run_id", value=run_id)
return run_id
def check_dag_status(**kwargs):
for ti in kwargs["dag_run"].get_task_instances():
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
@dag(
dag_id=DAG_NAME,
schedule_interval=None,
start_date=days_ago(2),
catchup=False,
)
def run_dag():
def read_vars(**context):
BUCKET = os.getenv("INBOX_BUCKET")
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
print("========= DBT ENV =========")
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
return 1
read_vars_task = PythonOperator(
task_id="read_vars",
python_callable=read_vars,
provide_context=True,
)
retrieve_run_id_task = PythonOperator(
task_id="retrieve_run_id",
python_callable=retrieve_run_id,
provide_context=True,
)
control_external_run_start = BashOperator(
task_id="control_external_run_start",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_start \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
)
common_profile = ProfileConfig(
profiles_yml_filepath=dbt_profiles_dir,
profile_name="mrds",
target_name=ENV_NAME,
)
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
common_vars = {
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
"input_service_name": DATABASE_NAME,
"workflow_name": DAG_NAME,
}
common_operator_args = {
"vars": common_vars,
"env": dbt_env,
}
m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT = DbtTaskGroup(
group_id="m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT",
project_config=common_project,
profile_config=common_profile,
render_config=RenderConfig(select=[
"tag:m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT",
]),
operator_args=common_operator_args,
)
control_external_run_end = BashOperator(
task_id="control_external_run_end",
params={"db": DATABASE_NAME, "wf": DAG_NAME},
env=dbt_env,
bash_command="""
set -euxo pipefail
cd /opt/dbt
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
run-operation control_external_run_end \
--vars '{{ {
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
"input_service_name": params.db,
"workflow_name": params.wf
} | tojson }}'
""",
trigger_rule=TriggerRule.ALL_DONE,
)
dag_status = PythonOperator(
task_id="dag_status",
provide_context=True,
python_callable=check_dag_status,
trigger_rule=TriggerRule.ALL_DONE,
)
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT >> control_external_run_end >> dag_status
globals()[DAG_NAME] = run_dag()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!-- ***************** Schema for UCDB DisseminationFile ******************* -->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:c2d="http://c2d.escb.eu/UseOfCollateralMessage"
targetNamespace="http://c2d.escb.eu/UseOfCollateralMessage" elementFormDefault="qualified" version="3.0">
<!-- ***************** Include Common types ******************* -->
<xs:include schemaLocation="common_ucdb.xsd"/>
<!-- Definition of the root element and its structure -->
<xs:element name="DisseminationFile">
<xs:annotation>
<xs:documentation>Use of Collateral message. Dissemination files hold all reported usages of a snapshot and a specific NCB.</xs:documentation>
</xs:annotation>
<xs:complexType>
<xs:sequence>
<xs:element name="MetaInformation" type="c2d:MetaInformationTypeDisseminationFile"/>
<xs:element name="MarketableAssets" type="c2d:DisseminationMarketableAssetsType" minOccurs="0"/>
<xs:element name="NonMarketableAssets" type="c2d:DisseminationNonMarketableAssetsType" minOccurs="0"/>
<xs:element name="NonMarketableDECCs" type="c2d:DisseminationNonMarketableDECCsType" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="version" type="xs:string" use="required"/>
</xs:complexType>
</xs:element>
<xs:complexType name="MetaInformationTypeDisseminationFile">
<xs:complexContent>
<xs:extension base="c2d:AbstractMetaInformationType">
<xs:sequence>
<xs:element name="ReportingNCB" type="c2d:EurosystemISOCodeType">
<xs:annotation>
<xs:documentation>The two letter code identifying the NCB contained in the dissemination file.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="SnapshotDate" type="xs:date">
<xs:annotation>
<xs:documentation>The date the snapshot of the initial file data was taken.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="DateCreated" type="xs:dateTime">
<xs:annotation>
<xs:documentation>The date when the dissemination file has been created.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="NumberOfSuspectRecords" type="xs:unsignedInt">
<xs:annotation>
<xs:documentation>The number of records in the dissemination file still in status suspect.</xs:documentation>
</xs:annotation>
</xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="DisseminationMarketableAssetsType">
<xs:annotation>
<xs:documentation>Holds all marketable assets.</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="MarketableAsset" type="c2d:DisseminationMarketableAssetType" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="DisseminationNonMarketableAssetsType">
<xs:annotation>
<xs:documentation>Holds all non marketable assets.</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="NonMarketableAsset" type="c2d:DisseminationNonMarketableAssetType" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="DisseminationNonMarketableDECCsType">
<xs:annotation>
<xs:documentation>Holds all non marketable DECCs.</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="NonMarketableDECC" type="c2d:DisseminationNonMarketableDECCType" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
<xs:complexType name="DisseminationMarketableAssetType">
<xs:annotation>
<xs:documentation>Marketable Assets with suspect addon</xs:documentation>
</xs:annotation>
<xs:complexContent>
<xs:extension base="c2d:MarketableAssetType">
<xs:sequence>
<xs:element name="SuspectInformation" type="c2d:DisseminationSuspectType" minOccurs="0"/>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="DisseminationNonMarketableAssetType">
<xs:annotation>
<xs:documentation>Non Marketable Assets with suspect addon</xs:documentation>
</xs:annotation>
<xs:complexContent>
<xs:extension base="c2d:NonMarketableAssetType">
<xs:sequence>
<xs:element name="SuspectInformation" type="c2d:DisseminationSuspectType" minOccurs="0"/>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="DisseminationNonMarketableDECCType">
<xs:annotation>
<xs:documentation>Non Marketable DECCs with suspect addon</xs:documentation>
</xs:annotation>
<xs:complexContent>
<xs:extension base="c2d:NonMarketableDECCType">
<xs:sequence>
<xs:element name="SuspectInformation" type="c2d:DisseminationSuspectType" minOccurs="0"/>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="DisseminationSuspectType">
<xs:annotation>
<xs:documentation>Holds all suspect information of a reported usage.</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="SuspectId" type="c2d:IntegerGreaterThanZeroType">
<xs:annotation>
<xs:documentation>An ID created by the UCDB system identifying each single record that is stored in the UCDB system as a suspect record.</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="SuspectReasons" type="c2d:SuspectsReasonsType"/>
</xs:sequence>
</xs:complexType>
</xs:schema>

View File

@@ -0,0 +1,45 @@
# Global configurations
inbox_prefix: INBOX/C2D/CollateralDataDissemination
archive_prefix: ARCHIVE/C2D/CollateralDataDissemination
workflow_name: w_ODS_C2D_UC_DISSEMI
validation_schema_path: 'disseminationFile.xsd'
file_type: xml
# List of tasks
tasks:
# Task 1
- task_name: m_ODS_C2D_UC_DISSEM_METADATA_PARSE
ods_prefix: INBOX/C2D/CollateralDataDissemination/C2D_A_UC_DISSEM_METADATA_LOADS
output_table: C2D_A_UC_DISSEM_METADATA_LOADS
namespaces:
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath'
value: '//ns:DisseminationFile/@version'
column_header: 'C2D_VERSION'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:DateCreated'
column_header: 'FILE_CREATION_DATE'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:NumberOfSuspectRecords'
column_header: 'NO_OF_SUSPECT_RECORDS'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:ReportingNCB'
column_header: 'REPORTING_NCB'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:SnapshotDate'
column_header: 'SNAPSHOT_DATE'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'PROCESSED_TO_DWH'

View File

@@ -0,0 +1,968 @@
<?xml version="1.0" encoding="UTF-8"?><schema targetNamespace="http://c2d.escb.eu/UseOfCollateralMessage" elementFormDefault="qualified" attributeFormDefault="qualified" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.w3.org/2001/XMLSchema">
<simpleType name="cl_issuer_csd">
<restriction base="string">
<enumeration value="CLAT01"/>
<enumeration value="CLBE01"/>
<enumeration value="CLBE02"/>
<enumeration value="CLBG01"/>
<enumeration value="CLBL01"/>
<enumeration value="CLCY01"/>
<enumeration value="CLCZ01"/>
<enumeration value="CLDE01"/>
<enumeration value="CLDE02"/>
<enumeration value="CLDK01"/>
<enumeration value="CLDL01"/>
<enumeration value="CLEE01"/>
<enumeration value="CLES01"/>
<enumeration value="CLEU01"/>
<enumeration value="CLFI01"/>
<enumeration value="CLFR01"/>
<enumeration value="CLGR01"/>
<enumeration value="CLHR01"/>
<enumeration value="CLIT01"/>
<enumeration value="CLLD01"/>
<enumeration value="CLLT02"/>
<enumeration value="CLLU01"/>
<enumeration value="CLLU03"/>
<enumeration value="CLLV02"/>
<enumeration value="CLMT01"/>
<enumeration value="CLNL01"/>
<enumeration value="CLPT02"/>
<enumeration value="CLSI01"/>
<enumeration value="CLSK01"/>
<enumeration value="CLSK02"/>
</restriction>
</simpleType>
<simpleType name="cl_ccb">
<restriction base="string">
<enumeration value="AT"/>
<enumeration value="BE"/>
<enumeration value="BG"/>
<enumeration value="CY"/>
<enumeration value="DE"/>
<enumeration value="EE"/>
<enumeration value="ES"/>
<enumeration value="FI"/>
<enumeration value="FR"/>
<enumeration value="GR"/>
<enumeration value="HR"/>
<enumeration value="IE"/>
<enumeration value="IT"/>
<enumeration value="LT"/>
<enumeration value="LU"/>
<enumeration value="LV"/>
<enumeration value="MT"/>
<enumeration value="NL"/>
<enumeration value="PT"/>
<enumeration value="SI"/>
<enumeration value="SK"/>
</restriction>
</simpleType>
<simpleType name="cl_mobilisation_channel">
<restriction base="string">
<enumeration value="CCBM mkt"/>
<enumeration value="CCBM mkt with links"/>
<enumeration value="CCBM nonmkt"/>
<enumeration value="Direct access"/>
<enumeration value="Direct access with links"/>
<enumeration value="Local CSD"/>
<enumeration value="Local CSD with links"/>
<enumeration value="Local cb nonmkt"/>
<enumeration value="Local dom nonmkt"/>
</restriction>
</simpleType>
<simpleType name="cl_triparty_agent">
<restriction base="string">
<enumeration value="CLBE02"/>
<enumeration value="CLDE01"/>
<enumeration value="CLFR01"/>
<enumeration value="CLIT01"/>
<enumeration value="CLLU01"/>
<enumeration value="CLNL01"/>
</restriction>
</simpleType>
<simpleType name="cl_la_rating">
<restriction base="string">
<enumeration value="LADB01"/>
<enumeration value="LADB02"/>
<enumeration value="LADB03"/>
<enumeration value="LADB04"/>
<enumeration value="LADB05"/>
<enumeration value="LADB06"/>
<enumeration value="LADB07"/>
<enumeration value="LADB08"/>
<enumeration value="LADB09"/>
<enumeration value="LADB10"/>
<enumeration value="LADB11"/>
<enumeration value="LADB12"/>
<enumeration value="LADB13"/>
<enumeration value="LADB14"/>
<enumeration value="LADB15"/>
<enumeration value="LADB16"/>
<enumeration value="LADB17"/>
<enumeration value="LADB18"/>
<enumeration value="LADB19"/>
<enumeration value="LADB20"/>
<enumeration value="LADB21"/>
<enumeration value="LADB22"/>
<enumeration value="LADB23"/>
<enumeration value="LADB24"/>
<enumeration value="LADB25"/>
<enumeration value="LADB26"/>
<enumeration value="LAIA01"/>
<enumeration value="LAIA02"/>
<enumeration value="LAIA03"/>
<enumeration value="LAIA04"/>
<enumeration value="LAIA05"/>
<enumeration value="LAIA06"/>
<enumeration value="LAIA07"/>
<enumeration value="LAIA08"/>
<enumeration value="LAIA09"/>
<enumeration value="LAIA10"/>
<enumeration value="LAIA11"/>
<enumeration value="LAIA12"/>
<enumeration value="LAIA13"/>
<enumeration value="LAIA14"/>
<enumeration value="LAIA15"/>
<enumeration value="LAIA16"/>
<enumeration value="LAIA17"/>
<enumeration value="LAIA18"/>
<enumeration value="LAIA19"/>
<enumeration value="LAIA20"/>
<enumeration value="LAIA21"/>
<enumeration value="LAMY01"/>
<enumeration value="LAMY02"/>
<enumeration value="LAMY03"/>
<enumeration value="LAMY04"/>
<enumeration value="LAMY05"/>
<enumeration value="LAMY06"/>
<enumeration value="LAMY07"/>
<enumeration value="LAMY08"/>
<enumeration value="LAMY09"/>
<enumeration value="LAMY10"/>
<enumeration value="LAMY11"/>
<enumeration value="LAMY12"/>
<enumeration value="LAMY13"/>
<enumeration value="LAMY14"/>
<enumeration value="LAMY15"/>
<enumeration value="LAMY16"/>
<enumeration value="LAMY17"/>
<enumeration value="LAMY18"/>
<enumeration value="LAMY19"/>
<enumeration value="LAMY20"/>
<enumeration value="LAMY21"/>
<enumeration value="LASC01"/>
<enumeration value="LASC02"/>
<enumeration value="LASC03"/>
<enumeration value="LASC04"/>
<enumeration value="LASC05"/>
<enumeration value="LASC06"/>
<enumeration value="LASC07"/>
<enumeration value="LASC08"/>
<enumeration value="LASC09"/>
<enumeration value="LASC10"/>
<enumeration value="LASC11"/>
<enumeration value="LASC12"/>
<enumeration value="LASC13"/>
<enumeration value="LASC14"/>
<enumeration value="LASC15"/>
<enumeration value="LASC16"/>
<enumeration value="LASC17"/>
<enumeration value="LASC18"/>
<enumeration value="LASC19"/>
<enumeration value="LASC20"/>
<enumeration value="LASC21"/>
<enumeration value="LASP01"/>
<enumeration value="LASP02"/>
<enumeration value="LASP03"/>
<enumeration value="LASP04"/>
<enumeration value="LASP05"/>
<enumeration value="LASP06"/>
<enumeration value="LASP07"/>
<enumeration value="LASP08"/>
<enumeration value="LASP09"/>
<enumeration value="LASP10"/>
<enumeration value="LASP11"/>
<enumeration value="LASP12"/>
<enumeration value="LASP13"/>
<enumeration value="LASP14"/>
<enumeration value="LASP15"/>
<enumeration value="LASP16"/>
<enumeration value="LASP17"/>
<enumeration value="LASP18"/>
<enumeration value="LASP19"/>
<enumeration value="LASP20"/>
<enumeration value="LASP21"/>
<enumeration value="LASP22"/>
<enumeration value="LPDB01"/>
<enumeration value="LPDB02"/>
<enumeration value="LPDB03"/>
<enumeration value="LPDB04"/>
<enumeration value="LPDB05"/>
<enumeration value="LPDB06"/>
<enumeration value="LPDB07"/>
<enumeration value="LPDB08"/>
<enumeration value="LPDB09"/>
<enumeration value="LPDB10"/>
<enumeration value="LPDB11"/>
<enumeration value="LPDB12"/>
<enumeration value="LPDB13"/>
<enumeration value="LPDB14"/>
<enumeration value="LPDB15"/>
<enumeration value="LPDB16"/>
<enumeration value="LPDB17"/>
<enumeration value="LPDB18"/>
<enumeration value="LPDB19"/>
<enumeration value="LPDB20"/>
<enumeration value="LPDB21"/>
<enumeration value="LPDB22"/>
<enumeration value="LPDB23"/>
<enumeration value="LPDB24"/>
<enumeration value="LPDB25"/>
<enumeration value="LPDB26"/>
<enumeration value="LPIA01"/>
<enumeration value="LPIA02"/>
<enumeration value="LPIA03"/>
<enumeration value="LPIA04"/>
<enumeration value="LPIA05"/>
<enumeration value="LPIA06"/>
<enumeration value="LPIA07"/>
<enumeration value="LPIA08"/>
<enumeration value="LPIA09"/>
<enumeration value="LPIA10"/>
<enumeration value="LPIA11"/>
<enumeration value="LPIA12"/>
<enumeration value="LPIA13"/>
<enumeration value="LPIA14"/>
<enumeration value="LPIA15"/>
<enumeration value="LPIA16"/>
<enumeration value="LPIA17"/>
<enumeration value="LPIA18"/>
<enumeration value="LPIA19"/>
<enumeration value="LPIA20"/>
<enumeration value="LPIA21"/>
<enumeration value="LPMY01"/>
<enumeration value="LPMY02"/>
<enumeration value="LPMY03"/>
<enumeration value="LPMY04"/>
<enumeration value="LPMY05"/>
<enumeration value="LPMY06"/>
<enumeration value="LPMY07"/>
<enumeration value="LPMY08"/>
<enumeration value="LPMY09"/>
<enumeration value="LPMY10"/>
<enumeration value="LPMY11"/>
<enumeration value="LPMY12"/>
<enumeration value="LPMY13"/>
<enumeration value="LPMY14"/>
<enumeration value="LPMY15"/>
<enumeration value="LPMY16"/>
<enumeration value="LPMY17"/>
<enumeration value="LPMY18"/>
<enumeration value="LPMY19"/>
<enumeration value="LPMY20"/>
<enumeration value="LPMY21"/>
<enumeration value="LPSC01"/>
<enumeration value="LPSC02"/>
<enumeration value="LPSC03"/>
<enumeration value="LPSC04"/>
<enumeration value="LPSC05"/>
<enumeration value="LPSC06"/>
<enumeration value="LPSC07"/>
<enumeration value="LPSC08"/>
<enumeration value="LPSC09"/>
<enumeration value="LPSC10"/>
<enumeration value="LPSC11"/>
<enumeration value="LPSC12"/>
<enumeration value="LPSC13"/>
<enumeration value="LPSC14"/>
<enumeration value="LPSC15"/>
<enumeration value="LPSC16"/>
<enumeration value="LPSC17"/>
<enumeration value="LPSC18"/>
<enumeration value="LPSC19"/>
<enumeration value="LPSC20"/>
<enumeration value="LPSC21"/>
<enumeration value="LPSP01"/>
<enumeration value="LPSP02"/>
<enumeration value="LPSP03"/>
<enumeration value="LPSP04"/>
<enumeration value="LPSP05"/>
<enumeration value="LPSP06"/>
<enumeration value="LPSP07"/>
<enumeration value="LPSP08"/>
<enumeration value="LPSP09"/>
<enumeration value="LPSP10"/>
<enumeration value="LPSP11"/>
<enumeration value="LPSP12"/>
<enumeration value="LPSP13"/>
<enumeration value="LPSP14"/>
<enumeration value="LPSP15"/>
<enumeration value="LPSP16"/>
<enumeration value="LPSP17"/>
<enumeration value="LPSP18"/>
<enumeration value="LPSP19"/>
<enumeration value="LPSP20"/>
<enumeration value="LPSP21"/>
<enumeration value="LPSP22"/>
</restriction>
</simpleType>
<simpleType name="cl_si_rating">
<restriction base="string">
<enumeration value="SIDB01"/>
<enumeration value="SIDB02"/>
<enumeration value="SIDB03"/>
<enumeration value="SIDB04"/>
<enumeration value="SIDB05"/>
<enumeration value="SIDB06"/>
<enumeration value="SIDB07"/>
<enumeration value="SIDB08"/>
<enumeration value="SIDB09"/>
<enumeration value="SIIA01"/>
<enumeration value="SIIA02"/>
<enumeration value="SIIA03"/>
<enumeration value="SIIA04"/>
<enumeration value="SIIA05"/>
<enumeration value="SIIA06"/>
<enumeration value="SIIA07"/>
<enumeration value="SIMY01"/>
<enumeration value="SIMY02"/>
<enumeration value="SIMY03"/>
<enumeration value="SIMY04"/>
<enumeration value="SISC01"/>
<enumeration value="SISC02"/>
<enumeration value="SISC03"/>
<enumeration value="SISC04"/>
<enumeration value="SISC05"/>
<enumeration value="SISC06"/>
<enumeration value="SISC07"/>
<enumeration value="SISP01"/>
<enumeration value="SISP02"/>
<enumeration value="SISP03"/>
<enumeration value="SISP04"/>
<enumeration value="SISP05"/>
<enumeration value="SISP06"/>
<enumeration value="SISP07"/>
</restriction>
</simpleType>
<simpleType name="cl_li_rating">
<restriction base="string">
<enumeration value="LIDB01"/>
<enumeration value="LIDB02"/>
<enumeration value="LIDB03"/>
<enumeration value="LIDB04"/>
<enumeration value="LIDB05"/>
<enumeration value="LIDB06"/>
<enumeration value="LIDB07"/>
<enumeration value="LIDB08"/>
<enumeration value="LIDB09"/>
<enumeration value="LIDB10"/>
<enumeration value="LIDB11"/>
<enumeration value="LIDB12"/>
<enumeration value="LIDB13"/>
<enumeration value="LIDB14"/>
<enumeration value="LIDB15"/>
<enumeration value="LIDB16"/>
<enumeration value="LIDB17"/>
<enumeration value="LIDB18"/>
<enumeration value="LIDB19"/>
<enumeration value="LIDB20"/>
<enumeration value="LIDB21"/>
<enumeration value="LIDB22"/>
<enumeration value="LIDB23"/>
<enumeration value="LIDB24"/>
<enumeration value="LIDB25"/>
<enumeration value="LIDB26"/>
<enumeration value="LIIA01"/>
<enumeration value="LIIA02"/>
<enumeration value="LIIA03"/>
<enumeration value="LIIA04"/>
<enumeration value="LIIA05"/>
<enumeration value="LIIA06"/>
<enumeration value="LIIA07"/>
<enumeration value="LIIA08"/>
<enumeration value="LIIA09"/>
<enumeration value="LIIA10"/>
<enumeration value="LIIA11"/>
<enumeration value="LIIA12"/>
<enumeration value="LIIA13"/>
<enumeration value="LIIA14"/>
<enumeration value="LIIA15"/>
<enumeration value="LIIA16"/>
<enumeration value="LIIA17"/>
<enumeration value="LIIA18"/>
<enumeration value="LIIA19"/>
<enumeration value="LIIA20"/>
<enumeration value="LIIA21"/>
<enumeration value="LIIA22"/>
<enumeration value="LIMY01"/>
<enumeration value="LIMY02"/>
<enumeration value="LIMY03"/>
<enumeration value="LIMY04"/>
<enumeration value="LIMY05"/>
<enumeration value="LIMY06"/>
<enumeration value="LIMY07"/>
<enumeration value="LIMY08"/>
<enumeration value="LIMY09"/>
<enumeration value="LIMY10"/>
<enumeration value="LIMY11"/>
<enumeration value="LIMY12"/>
<enumeration value="LIMY13"/>
<enumeration value="LIMY14"/>
<enumeration value="LIMY15"/>
<enumeration value="LIMY16"/>
<enumeration value="LIMY17"/>
<enumeration value="LIMY18"/>
<enumeration value="LIMY19"/>
<enumeration value="LIMY20"/>
<enumeration value="LIMY21"/>
<enumeration value="LISC01"/>
<enumeration value="LISC02"/>
<enumeration value="LISC03"/>
<enumeration value="LISC04"/>
<enumeration value="LISC05"/>
<enumeration value="LISC06"/>
<enumeration value="LISC07"/>
<enumeration value="LISC08"/>
<enumeration value="LISC09"/>
<enumeration value="LISC10"/>
<enumeration value="LISC11"/>
<enumeration value="LISC12"/>
<enumeration value="LISC13"/>
<enumeration value="LISC14"/>
<enumeration value="LISC15"/>
<enumeration value="LISC16"/>
<enumeration value="LISC17"/>
<enumeration value="LISC18"/>
<enumeration value="LISC19"/>
<enumeration value="LISC20"/>
<enumeration value="LISC21"/>
<enumeration value="LISP01"/>
<enumeration value="LISP02"/>
<enumeration value="LISP03"/>
<enumeration value="LISP04"/>
<enumeration value="LISP05"/>
<enumeration value="LISP06"/>
<enumeration value="LISP07"/>
<enumeration value="LISP08"/>
<enumeration value="LISP09"/>
<enumeration value="LISP10"/>
<enumeration value="LISP11"/>
<enumeration value="LISP12"/>
<enumeration value="LISP13"/>
<enumeration value="LISP14"/>
<enumeration value="LISP15"/>
<enumeration value="LISP16"/>
<enumeration value="LISP17"/>
<enumeration value="LISP18"/>
<enumeration value="LISP19"/>
<enumeration value="LISP20"/>
<enumeration value="LISP21"/>
<enumeration value="LISP22"/>
</restriction>
</simpleType>
<simpleType name="cl_lg_rating">
<restriction base="string">
<enumeration value="LGDB01"/>
<enumeration value="LGDB02"/>
<enumeration value="LGDB03"/>
<enumeration value="LGDB04"/>
<enumeration value="LGDB05"/>
<enumeration value="LGDB06"/>
<enumeration value="LGDB07"/>
<enumeration value="LGDB08"/>
<enumeration value="LGDB09"/>
<enumeration value="LGDB10"/>
<enumeration value="LGDB11"/>
<enumeration value="LGDB12"/>
<enumeration value="LGDB13"/>
<enumeration value="LGDB14"/>
<enumeration value="LGDB15"/>
<enumeration value="LGDB16"/>
<enumeration value="LGDB17"/>
<enumeration value="LGDB18"/>
<enumeration value="LGDB19"/>
<enumeration value="LGDB20"/>
<enumeration value="LGDB21"/>
<enumeration value="LGDB22"/>
<enumeration value="LGDB23"/>
<enumeration value="LGDB24"/>
<enumeration value="LGDB25"/>
<enumeration value="LGDB26"/>
<enumeration value="LGIA01"/>
<enumeration value="LGIA02"/>
<enumeration value="LGIA03"/>
<enumeration value="LGIA04"/>
<enumeration value="LGIA05"/>
<enumeration value="LGIA06"/>
<enumeration value="LGIA07"/>
<enumeration value="LGIA08"/>
<enumeration value="LGIA09"/>
<enumeration value="LGIA10"/>
<enumeration value="LGIA11"/>
<enumeration value="LGIA12"/>
<enumeration value="LGIA13"/>
<enumeration value="LGIA14"/>
<enumeration value="LGIA15"/>
<enumeration value="LGIA16"/>
<enumeration value="LGIA17"/>
<enumeration value="LGIA18"/>
<enumeration value="LGIA19"/>
<enumeration value="LGIA20"/>
<enumeration value="LGIA21"/>
<enumeration value="LGIA22"/>
<enumeration value="LGMY01"/>
<enumeration value="LGMY02"/>
<enumeration value="LGMY03"/>
<enumeration value="LGMY04"/>
<enumeration value="LGMY05"/>
<enumeration value="LGMY06"/>
<enumeration value="LGMY07"/>
<enumeration value="LGMY08"/>
<enumeration value="LGMY09"/>
<enumeration value="LGMY10"/>
<enumeration value="LGMY11"/>
<enumeration value="LGMY12"/>
<enumeration value="LGMY13"/>
<enumeration value="LGMY14"/>
<enumeration value="LGMY15"/>
<enumeration value="LGMY16"/>
<enumeration value="LGMY17"/>
<enumeration value="LGMY18"/>
<enumeration value="LGMY19"/>
<enumeration value="LGMY20"/>
<enumeration value="LGMY21"/>
<enumeration value="LGSC01"/>
<enumeration value="LGSC02"/>
<enumeration value="LGSC03"/>
<enumeration value="LGSC04"/>
<enumeration value="LGSC05"/>
<enumeration value="LGSC06"/>
<enumeration value="LGSC07"/>
<enumeration value="LGSC08"/>
<enumeration value="LGSC09"/>
<enumeration value="LGSC10"/>
<enumeration value="LGSC11"/>
<enumeration value="LGSC12"/>
<enumeration value="LGSC13"/>
<enumeration value="LGSC14"/>
<enumeration value="LGSC15"/>
<enumeration value="LGSC16"/>
<enumeration value="LGSC17"/>
<enumeration value="LGSC18"/>
<enumeration value="LGSC19"/>
<enumeration value="LGSC20"/>
<enumeration value="LGSC21"/>
<enumeration value="LGSP01"/>
<enumeration value="LGSP02"/>
<enumeration value="LGSP03"/>
<enumeration value="LGSP04"/>
<enumeration value="LGSP05"/>
<enumeration value="LGSP06"/>
<enumeration value="LGSP07"/>
<enumeration value="LGSP08"/>
<enumeration value="LGSP09"/>
<enumeration value="LGSP10"/>
<enumeration value="LGSP11"/>
<enumeration value="LGSP12"/>
<enumeration value="LGSP13"/>
<enumeration value="LGSP14"/>
<enumeration value="LGSP15"/>
<enumeration value="LGSP16"/>
<enumeration value="LGSP17"/>
<enumeration value="LGSP18"/>
<enumeration value="LGSP19"/>
<enumeration value="LGSP20"/>
<enumeration value="LGSP21"/>
<enumeration value="LGSP22"/>
</restriction>
</simpleType>
<simpleType name="cl_sa_rating">
<restriction base="string">
<enumeration value="SADB01"/>
<enumeration value="SADB02"/>
<enumeration value="SADB03"/>
<enumeration value="SADB04"/>
<enumeration value="SADB05"/>
<enumeration value="SADB06"/>
<enumeration value="SADB07"/>
<enumeration value="SADB08"/>
<enumeration value="SADB09"/>
<enumeration value="SAIA01"/>
<enumeration value="SAIA02"/>
<enumeration value="SAIA03"/>
<enumeration value="SAIA04"/>
<enumeration value="SAIA05"/>
<enumeration value="SAIA06"/>
<enumeration value="SAIA07"/>
<enumeration value="SAMY01"/>
<enumeration value="SAMY02"/>
<enumeration value="SAMY03"/>
<enumeration value="SAMY04"/>
<enumeration value="SASC01"/>
<enumeration value="SASC02"/>
<enumeration value="SASC03"/>
<enumeration value="SASC04"/>
<enumeration value="SASC05"/>
<enumeration value="SASC06"/>
<enumeration value="SASC07"/>
<enumeration value="SASP01"/>
<enumeration value="SASP02"/>
<enumeration value="SASP03"/>
<enumeration value="SASP04"/>
<enumeration value="SASP05"/>
<enumeration value="SASP06"/>
<enumeration value="SASP07"/>
<enumeration value="SPDB01"/>
<enumeration value="SPDB02"/>
<enumeration value="SPDB03"/>
<enumeration value="SPDB04"/>
<enumeration value="SPDB05"/>
<enumeration value="SPDB06"/>
<enumeration value="SPDB07"/>
<enumeration value="SPDB08"/>
<enumeration value="SPDB09"/>
<enumeration value="SPIA01"/>
<enumeration value="SPIA02"/>
<enumeration value="SPIA03"/>
<enumeration value="SPIA04"/>
<enumeration value="SPIA05"/>
<enumeration value="SPIA06"/>
<enumeration value="SPIA07"/>
<enumeration value="SPMY01"/>
<enumeration value="SPMY02"/>
<enumeration value="SPMY03"/>
<enumeration value="SPMY04"/>
<enumeration value="SPSC01"/>
<enumeration value="SPSC02"/>
<enumeration value="SPSC03"/>
<enumeration value="SPSC04"/>
<enumeration value="SPSC05"/>
<enumeration value="SPSC06"/>
<enumeration value="SPSC07"/>
<enumeration value="SPSP01"/>
<enumeration value="SPSP02"/>
<enumeration value="SPSP03"/>
<enumeration value="SPSP04"/>
<enumeration value="SPSP05"/>
<enumeration value="SPSP06"/>
<enumeration value="SPSP07"/>
</restriction>
</simpleType>
<simpleType name="cl_cgr_rating">
<restriction base="string">
<enumeration value="10"/>
<enumeration value="109"/>
<enumeration value="20"/>
<enumeration value="30"/>
<enumeration value="40"/>
<enumeration value="99"/>
</restriction>
</simpleType>
<simpleType name="cl_reference_rate">
<restriction base="string">
<enumeration value="10MEUBOR"/>
<enumeration value="10YEUIRS"/>
<enumeration value="10YGOTTEX"/>
<enumeration value="10YICAP"/>
<enumeration value="10YICES"/>
<enumeration value="10YOLO"/>
<enumeration value="11MEUBOR"/>
<enumeration value="11YEUIRS"/>
<enumeration value="11YICAP"/>
<enumeration value="11YOLO"/>
<enumeration value="12YEUIRS"/>
<enumeration value="12YGOTTEX"/>
<enumeration value="12YICAP"/>
<enumeration value="12YICES"/>
<enumeration value="12YOLO"/>
<enumeration value="13YEUIRS"/>
<enumeration value="13YICAP"/>
<enumeration value="13YOLO"/>
<enumeration value="14YEUIRS"/>
<enumeration value="14YICAP"/>
<enumeration value="14YOLO"/>
<enumeration value="15YEUIRS"/>
<enumeration value="15YGOTTEX"/>
<enumeration value="15YICAP"/>
<enumeration value="15YICES"/>
<enumeration value="15YOLO"/>
<enumeration value="16YICAP"/>
<enumeration value="16YOLO"/>
<enumeration value="17YICAP"/>
<enumeration value="17YOLO"/>
<enumeration value="18YICAP"/>
<enumeration value="18YOLO"/>
<enumeration value="19YICAP"/>
<enumeration value="19YOLO"/>
<enumeration value="1MEUBOR"/>
<enumeration value="1MEUCMS"/>
<enumeration value="1MLIBOR"/>
<enumeration value="1MLICMS"/>
<enumeration value="1WEUBOR"/>
<enumeration value="1WEUCMS"/>
<enumeration value="1WLIBOR"/>
<enumeration value="1WLICMS"/>
<enumeration value="1YEUBOR"/>
<enumeration value="1YEUCMS"/>
<enumeration value="1YEUIRS"/>
<enumeration value="1YICAP"/>
<enumeration value="1YICES"/>
<enumeration value="1YLIBOR"/>
<enumeration value="1YLICMS"/>
<enumeration value="1YOLO"/>
<enumeration value="20YEUIRS"/>
<enumeration value="20YGOTTEX"/>
<enumeration value="20YICAP"/>
<enumeration value="20YICES"/>
<enumeration value="20YOLO"/>
<enumeration value="21YICAP"/>
<enumeration value="21YOLO"/>
<enumeration value="22YICAP"/>
<enumeration value="22YOLO"/>
<enumeration value="23YICAP"/>
<enumeration value="23YOLO"/>
<enumeration value="24YICAP"/>
<enumeration value="24YOLO"/>
<enumeration value="25YEUIRS"/>
<enumeration value="25YICAP"/>
<enumeration value="25YICES"/>
<enumeration value="25YOLO"/>
<enumeration value="26YICAP"/>
<enumeration value="26YOLO"/>
<enumeration value="27YICAP"/>
<enumeration value="27YOLO"/>
<enumeration value="28YICAP"/>
<enumeration value="28YOLO"/>
<enumeration value="29YICAP"/>
<enumeration value="29YOLO"/>
<enumeration value="2MEUBOR"/>
<enumeration value="2MEUCMS"/>
<enumeration value="2MLIBOR"/>
<enumeration value="2MLICMS"/>
<enumeration value="2WEUBOR"/>
<enumeration value="2WEUCMS"/>
<enumeration value="2WLIBOR"/>
<enumeration value="2WLICMS"/>
<enumeration value="2YEUIRS"/>
<enumeration value="2YGOTTEX"/>
<enumeration value="2YICAP"/>
<enumeration value="2YICES"/>
<enumeration value="2YOLO"/>
<enumeration value="30YEUIRS"/>
<enumeration value="30YGOTTEX"/>
<enumeration value="30YICAP"/>
<enumeration value="30YICES"/>
<enumeration value="30YOLO"/>
<enumeration value="35YICAP"/>
<enumeration value="3MEUBOR"/>
<enumeration value="3MEUCMS"/>
<enumeration value="3MLIBOR"/>
<enumeration value="3MLICMS"/>
<enumeration value="3WEUBOR"/>
<enumeration value="3YEUIRS"/>
<enumeration value="3YGOTTEX"/>
<enumeration value="3YICAP"/>
<enumeration value="3YICES"/>
<enumeration value="3YOLO"/>
<enumeration value="40YICAP"/>
<enumeration value="4MEUBOR"/>
<enumeration value="4YEUIRS"/>
<enumeration value="4YGOTTEX"/>
<enumeration value="4YICAP"/>
<enumeration value="4YICES"/>
<enumeration value="4YOLO"/>
<enumeration value="50YICAP"/>
<enumeration value="5MEUBOR"/>
<enumeration value="5YEUIRS"/>
<enumeration value="5YGOTTEX"/>
<enumeration value="5YICAP"/>
<enumeration value="5YICES"/>
<enumeration value="5YOLO"/>
<enumeration value="6MEUBOR"/>
<enumeration value="6MEUCMS"/>
<enumeration value="6MLIBOR"/>
<enumeration value="6MLICMS"/>
<enumeration value="6YEUIRS"/>
<enumeration value="6YGOTTEX"/>
<enumeration value="6YICAP"/>
<enumeration value="6YICES"/>
<enumeration value="6YOLO"/>
<enumeration value="7MEUBOR"/>
<enumeration value="7YEUIRS"/>
<enumeration value="7YGOTTEX"/>
<enumeration value="7YICAP"/>
<enumeration value="7YICES"/>
<enumeration value="7YOLO"/>
<enumeration value="8MEUBOR"/>
<enumeration value="8YEUIRS"/>
<enumeration value="8YGOTTEX"/>
<enumeration value="8YICAP"/>
<enumeration value="8YICES"/>
<enumeration value="8YOLO"/>
<enumeration value="9MEUBOR"/>
<enumeration value="9MEUCMS"/>
<enumeration value="9MLIBOR"/>
<enumeration value="9MLICMS"/>
<enumeration value="9YEUIRS"/>
<enumeration value="9YGOTTEX"/>
<enumeration value="9YICAP"/>
<enumeration value="9YICES"/>
<enumeration value="9YOLO"/>
<enumeration value="A10YEUIRS"/>
<enumeration value="A11YEUIRS"/>
<enumeration value="A12YEUIRS"/>
<enumeration value="A13YEUIRS"/>
<enumeration value="A14YEUIRS"/>
<enumeration value="A15YEUIRS"/>
<enumeration value="A1MEUBOR"/>
<enumeration value="A1MEUCMS"/>
<enumeration value="A1MLIBOR"/>
<enumeration value="A1MLICMS"/>
<enumeration value="A1WEUBOR"/>
<enumeration value="A1WEUCMS"/>
<enumeration value="A1WLIBOR"/>
<enumeration value="A1WLICMS"/>
<enumeration value="A1YEUBOR"/>
<enumeration value="A1YEUCMS"/>
<enumeration value="A1YEUIRS"/>
<enumeration value="A1YLIBOR"/>
<enumeration value="A1YLICMS"/>
<enumeration value="A20YEUIRS"/>
<enumeration value="A25YEUIRS"/>
<enumeration value="A2MEUBOR"/>
<enumeration value="A2MEUCMS"/>
<enumeration value="A2MLIBOR"/>
<enumeration value="A2MLICMS"/>
<enumeration value="A2WEUBOR"/>
<enumeration value="A2WEUCMS"/>
<enumeration value="A2WLIBOR"/>
<enumeration value="A2WLICMS"/>
<enumeration value="A2YEUIRS"/>
<enumeration value="A30YEUIRS"/>
<enumeration value="A3MEUBOR"/>
<enumeration value="A3MEUCMS"/>
<enumeration value="A3MLIBOR"/>
<enumeration value="A3MLICMS"/>
<enumeration value="A3YEUIRS"/>
<enumeration value="A4YEUIRS"/>
<enumeration value="A5YEUIRS"/>
<enumeration value="A6MEUBOR"/>
<enumeration value="A6MEUCMS"/>
<enumeration value="A6MLIBOR"/>
<enumeration value="A6MLICMS"/>
<enumeration value="A6YEUIRS"/>
<enumeration value="A7YEUIRS"/>
<enumeration value="A8YEUIRS"/>
<enumeration value="A9MEUBOR"/>
<enumeration value="A9MEUCMS"/>
<enumeration value="A9MLIBOR"/>
<enumeration value="A9MLICMS"/>
<enumeration value="A9YEUIRS"/>
<enumeration value="ATG815"/>
<enumeration value="ATG8WBG"/>
<enumeration value="ATGMIN10"/>
<enumeration value="ATGMIN8"/>
<enumeration value="CNOTEC10"/>
<enumeration value="EONIA"/>
<enumeration value="ESBond"/>
<enumeration value="ESTR"/>
<enumeration value="EURR002W"/>
<enumeration value="EUSA10M"/>
<enumeration value="EUSA10Y"/>
<enumeration value="EUSA11M"/>
<enumeration value="EUSA11Y"/>
<enumeration value="EUSA12M"/>
<enumeration value="EUSA12Y"/>
<enumeration value="EUSA13Y"/>
<enumeration value="EUSA14Y"/>
<enumeration value="EUSA15M"/>
<enumeration value="EUSA15Y"/>
<enumeration value="EUSA16Y"/>
<enumeration value="EUSA17Y"/>
<enumeration value="EUSA18M"/>
<enumeration value="EUSA18Y"/>
<enumeration value="EUSA19Y"/>
<enumeration value="EUSA1D"/>
<enumeration value="EUSA1M"/>
<enumeration value="EUSA1W"/>
<enumeration value="EUSA20Y"/>
<enumeration value="EUSA21M"/>
<enumeration value="EUSA27M"/>
<enumeration value="EUSA2M"/>
<enumeration value="EUSA2Y"/>
<enumeration value="EUSA30M"/>
<enumeration value="EUSA33M"/>
<enumeration value="EUSA3M"/>
<enumeration value="EUSA3Y"/>
<enumeration value="EUSA4M"/>
<enumeration value="EUSA4Y"/>
<enumeration value="EUSA5M"/>
<enumeration value="EUSA5Y"/>
<enumeration value="EUSA6M"/>
<enumeration value="EUSA6Y"/>
<enumeration value="EUSA7M"/>
<enumeration value="EUSA7Y"/>
<enumeration value="EUSA8M"/>
<enumeration value="EUSA8Y"/>
<enumeration value="EUSA9M"/>
<enumeration value="EUSA9Y"/>
<enumeration value="MUDRB"/>
<enumeration value="OTHER"/>
<enumeration value="OTHER_NS"/>
<enumeration value="QMUDRB"/>
<enumeration value="QUDRB"/>
<enumeration value="RENDSTATO"/>
<enumeration value="SMUDRB"/>
<enumeration value="SUDRB"/>
<enumeration value="T4M"/>
<enumeration value="TAG"/>
<enumeration value="TAM"/>
<enumeration value="TME"/>
<enumeration value="UDRB"/>
<enumeration value="UDRBQWBG"/>
<enumeration value="YUDRB"/>
</restriction>
</simpleType>
<simpleType name="cl_eurosystem_iso_code">
<restriction base="string">
<enumeration value="AT"/>
<enumeration value="BE"/>
<enumeration value="BG"/>
<enumeration value="CY"/>
<enumeration value="DE"/>
<enumeration value="EE"/>
<enumeration value="ES"/>
<enumeration value="EU"/>
<enumeration value="FI"/>
<enumeration value="FR"/>
<enumeration value="GR"/>
<enumeration value="HR"/>
<enumeration value="IE"/>
<enumeration value="IT"/>
<enumeration value="LT"/>
<enumeration value="LU"/>
<enumeration value="LV"/>
<enumeration value="MT"/>
<enumeration value="NL"/>
<enumeration value="PT"/>
<enumeration value="SI"/>
<enumeration value="SK"/>
</restriction>
</simpleType>
</schema>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,386 @@
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/C2D/UC_DISSEM
workflow_name: w_OU_C2D_UC_DISSEM
validation_schema_path: '/opt/airflow/src/airflow/ods/c2d/uc/config/xsd/UseOfCollateralMessage.xsd'
bucket: mrds_inbox_tst
file_type: xml
# List of tasks
tasks:
- task_name: t_ODS_C2D_UC_DISSEM_create_metadata_file
ods_prefix: INBOX/C2D/UC_DISSEM/A_UC_DISSEM_METADATA_LOADS
output_table: A_UC_DISSEM_METADATA_LOADS
namespaces:
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath'
value: '//ns:DisseminationFile/@version'
column_header: 'C2D_VERSION'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:DateCreated'
column_header: 'FILE_CREATION_DATE'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:NumberOfSuspectRecords'
column_header: 'NO_OF_SUSPECT_RECORDS'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:ReportingNCB'
column_header: 'REPORTING_NCB'
is_key: 'N'
- type: 'xpath'
value: '//ns:MetaInformation/ns:SnapshotDate'
column_header: 'SNAPSHOT_DATE'
is_key: 'N'
- type: 'static'
value: 'N'
column_header: 'PROCESSED_TO_DWH'
- task_name: ou_C2D_UC_DISSEM_create_marketable_assets_file
ods_prefix: INBOX/C2D/UC_DISSEM/UC_MA_DISSEM
output_table: UC_MA_DISSEM
namespaces:
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath'
value: '//ns:MetaInformation/ns:ReportingNCB'
is_key: 'Y'
column_header: 'REPORTING_NCB'
- type: 'xpath'
value: '//ns:MetaInformation/ns:SnapshotDate'
is_key: 'Y'
column_header: 'SNAPSHOT_DATE'
- type: 'xpath'
value: '//ns:MetaInformation/ns:DateCreated'
column_header: 'FILE_CREATION_DATE'
is_key: 'Y'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:MFIId'
column_header: 'MFI_ID'
is_key: 'N'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:RegistrationCode/ns:ISINCode'
column_header: 'ISIN_CODE'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'OTHER_REG_NO'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:NominalAmountSubmitted'
column_header: 'NOM_AMT_SUBMITTED'
is_key: 'N'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:CollateralValueBeforeHaircuts'
column_header: 'COLL_BEFORE_HAIRCUTS'
is_key: 'N'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:CollateralValueAfterHaircuts'
column_header: 'COLL_AFTER_HAIRCUTS'
is_key: 'N'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:TypeOfSystem'
column_header: 'TYPE_OF_SYSTEM'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'TYPE_OF_OPERATION'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:DomesticOrXborder'
column_header: 'DOM_OR_XBORDER'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'ISSUER_CAS'
- type: 'static'
value: ''
column_header: 'ISSUER_CRED_PROVIDER'
- type: 'static'
value: ''
column_header: 'ISSUER_CLASS'
- type: 'static'
value: ''
column_header: 'ISSUER_RATING_ENUM_VALUE'
- type: 'static'
value: ''
column_header: 'ISSUER_RATING_NUMBER_VALUE'
- type: 'static'
value: ''
column_header: 'NCB_COMMENT'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:MobilisationChannel'
column_header: 'MOBILISATION_CHANNEL'
is_key: 'N'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:CCB'
column_header: 'CCB'
is_key: 'N'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:InvestorSSS'
column_header: 'INVESTOR_SSS'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'INTERMEDIARY_SSS'
- type: 'xpath'
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:IssuerSSS'
column_header: 'ISSUER_SSS'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'TRIPARTY_AGENT'
- type: 'static'
value: ''
column_header: 'SUSPECT_ID'
- type: 'static'
value: ''
column_header: 'QUALITY_CHECK_STATUS'
- type: 'static'
value: ''
column_header: 'ERROR_CODE'
- type: 'static'
value: ''
column_header: 'ERROR_MESSAGE'
- type: 'static'
value: ''
column_header: 'ERROR_POSITION_IN_FILE'
- task_name: ou_C2D_UC_DISSEM_create_nonmarketable_assets_file
ods_prefix: INBOX/C2D/UC_DISSEM/UC_NMA_DISSEM
output_table: UC_NMA_DISSEM
namespaces:
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
xsi: 'http://www.w3.org/2001/XMLSchema-instance'
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath'
value: '//ns:MetaInformation/ns:ReportingNCB'
is_key: 'Y'
column_header: 'REPORTING_NCB'
- type: 'xpath'
value: '//ns:MetaInformation/ns:SnapshotDate'
is_key: 'Y'
column_header: 'SNAPSHOT_DATE'
- type: 'xpath'
value: '//ns:MetaInformation/ns:DateCreated'
column_header: 'FILE_CREATION_DATE'
is_key: 'Y'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:MFIId'
column_header: 'MFI_ID'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:OtherRegistrationNumber'
column_header: 'OTHER_REG_NO'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:TypeOfSystem'
column_header: 'TYPE_OF_SYSTEM'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'TYPE_OF_OPERATION'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:DomesticOrXborder'
column_header: 'DOM_OR_XBORDER'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NonMktAssetType'
column_header: 'NON_MKT_ASSET_TYPE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:DateOfMaturity'
column_header: 'MATURITY_DATE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:InterestPaymentType'
column_header: 'INTEREST_PAYMENT_TYPE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Cap'
column_header: 'CAP'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ReferenceRate'
column_header: 'REFERENCE_RATE'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'REFERENCE_RATE_COMMENT'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:CollateralValueBeforeHaircuts'
column_header: 'COLL_BEFORE_HAIRCUTS'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:CollateralValueAfterHaircuts'
column_header: 'COLL_AFTER_HAIRCUTS'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NumberOfAggregatedDebtors'
column_header: 'NO_AGGR_DEBTORS'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:EligibleViaGuarantor'
column_header: 'ELIGIBLE_VIA_GUAR'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/@xsi:type'
column_header: 'DEBTOR_TYPE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Name'
column_header: 'DEBTOR_NAME'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:ID/@xsi:type'
column_header: 'DEBTOR_ID_TYPE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:ID/ns:value'
column_header: 'DEBTOR_ID'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Class'
column_header: 'DEBTOR_CLASS'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Residence'
column_header: 'DEBTOR_RESIDENCE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:CreditAssessmentSource'
column_header: 'DEBTOR_CAS'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:CredAssessSysProvider'
column_header: 'DEBTOR_CRED_PROV'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'DEBTOR_RATING_ENUM_VALUE'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Rating/ns:NumberValue'
column_header: 'DEBTOR_RATING_NUMBER_VALUE'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'GUAR_TYPE'
- type: 'static'
value: ''
column_header: 'GUAR_NAME'
- type: 'static'
value: ''
column_header: 'GUAR_ID_TYPE'
- type: 'static'
value: ''
column_header: 'GUAR_ID'
- type: 'static'
value: ''
column_header: 'GUAR_CLASS'
- type: 'static'
value: ''
column_header: 'GUAR_RESIDENCE'
- type: 'static'
value: ''
column_header: 'GUAR_CRED_CAS'
- type: 'static'
value: ''
column_header: 'GUAR_CRED_PROV'
- type: 'static'
value: ''
column_header: 'GUAR_RATING_ENUM_VALUE'
- type: 'static'
value: ''
column_header: 'GUAR_RATING_NUMBER_VALUE'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NumberOfAggregatedAssets'
column_header: 'NO_AGGR_ASSETS'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Denomination'
column_header: 'DENOMINATION'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Secured'
column_header: 'SECURED_FLAG'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ResidualMaturity'
column_header: 'RESIDUAL_MATURITY'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:BucketSize'
column_header: 'BUCKET_SIZE'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NCBComment'
column_header: 'NCB_COMMENT'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ValuationMethodology'
column_header: 'VALUATION_METHODOLOGY'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NominalAmountSubmitted'
column_header: 'NOM_AMT_SUBMITTED'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ResettingPeriodMoreThanOneYear'
column_header: 'RESET_PERIOD_MORE_ONE_YEAR'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:MobilisationChannel'
column_header: 'MOBILISATION_CHANNEL'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:CCB'
column_header: 'CCB'
is_key: 'N'
- type: 'static'
value: ''
column_header: 'INVESTOR_SSS'
- type: 'static'
value: ''
column_header: 'INTERMEDIARY_SSS'
- type: 'static'
value: ''
column_header: 'ISSUER_SSS'
- type: 'static'
value: ''
column_header: 'SUSPECT_ID'
- type: 'static'
value: ''
column_header: 'QUALITY_CHECK_STATUS'
- type: 'static'
value: ''
column_header: 'ERROR_CODE'
- type: 'static'
value: ''
column_header: 'ERROR_MESSAGE'
- type: 'static'
value: ''
column_header: 'ERROR_POSITION_IN_FILE'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:OaId'
column_header: 'OA_ID'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ContractId'
column_header: 'CONTRACT_ID'
is_key: 'N'
- type: 'xpath'
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:InstrmntId'
column_header: 'INSTRMNT_ID'
is_key: 'N'

View File

@@ -0,0 +1,179 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<DisseminationFile version="R14" xmlns="http://c2d.escb.eu/UseOfCollateralMessage">
<MetaInformation>
<ReportingNCB>FR</ReportingNCB>
<SnapshotDate>2023-02-16</SnapshotDate>
<DateCreated>2023-02-23T11:00:35</DateCreated>
<NumberOfSuspectRecords>0</NumberOfSuspectRecords>
</MetaInformation>
<MarketableAssets>
<MarketableAsset>
<MFIId>FR10107</MFIId>
<RegistrationCode xsi:type="isin" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ISINCode>BE6302866973</ISINCode>
</RegistrationCode>
<NominalAmountSubmitted>40</NominalAmountSubmitted>
<CollateralValueBeforeHaircuts>41.92566012</CollateralValueBeforeHaircuts>
<CollateralValueAfterHaircuts>40.75174164</CollateralValueAfterHaircuts>
<TypeOfSystem>pool</TypeOfSystem>
<DomesticOrXborder>cross-border</DomesticOrXborder>
<MobilisationChannel>CCBM mkt</MobilisationChannel>
<CCB>BE</CCB>
<InvestorSSS>CLBE01</InvestorSSS>
<IssuerSSS>CLBE01</IssuerSSS>
</MarketableAsset>
<MarketableAsset>
<MFIId>FR10107</MFIId>
<RegistrationCode xsi:type="isin" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ISINCode>DE000A1RQCP0</ISINCode>
</RegistrationCode>
<NominalAmountSubmitted>10</NominalAmountSubmitted>
<CollateralValueBeforeHaircuts>10.2664863</CollateralValueBeforeHaircuts>
<CollateralValueAfterHaircuts>10.06115657</CollateralValueAfterHaircuts>
<TypeOfSystem>pool</TypeOfSystem>
<DomesticOrXborder>cross-border</DomesticOrXborder>
<MobilisationChannel>CCBM mkt</MobilisationChannel>
<CCB>DE</CCB>
<InvestorSSS>CLDE01</InvestorSSS>
<IssuerSSS>CLDE01</IssuerSSS>
</MarketableAsset>
<MarketableAsset>
<MFIId>FR10107</MFIId>
<RegistrationCode xsi:type="isin" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ISINCode>ES0305248009</ISINCode>
</RegistrationCode>
<NominalAmountSubmitted>7.5753425</NominalAmountSubmitted>
<CollateralValueBeforeHaircuts>7.31191527</CollateralValueBeforeHaircuts>
<CollateralValueAfterHaircuts>6.78545737</CollateralValueAfterHaircuts>
<TypeOfSystem>pool</TypeOfSystem>
<DomesticOrXborder>cross-border</DomesticOrXborder>
<MobilisationChannel>CCBM mkt</MobilisationChannel>
<CCB>ES</CCB>
<InvestorSSS>CLES01</InvestorSSS>
<IssuerSSS>CLES01</IssuerSSS>
</MarketableAsset>
</MarketableAssets>
<NonMarketableAssets>
<NonMarketableAsset>
<MFIId>FR10107</MFIId>
<OtherRegistrationNumber>FRCPACAGGREGATE</OtherRegistrationNumber>
<TypeOfSystem>pool</TypeOfSystem>
<DomesticOrXborder>domestic</DomesticOrXborder>
<NonMktAssetType>Credit claim</NonMktAssetType>
<DateOfMaturity>2023-07-08</DateOfMaturity>
<InterestPaymentType>Fixed</InterestPaymentType>
<CollateralValueBeforeHaircuts>100</CollateralValueBeforeHaircuts>
<CollateralValueAfterHaircuts>88.5</CollateralValueAfterHaircuts>
<NumberOfAggregatedDebtors>2</NumberOfAggregatedDebtors>
<EligibleViaGuarantor>N</EligibleViaGuarantor>
<Debtor xsi:type="DG3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Name>Aggregate</Name>
<ID xsi:type="NCB">
<value>Aggregate</value>
</ID>
<Class>NFC-PSE3</Class>
<Residence>FR</Residence>
<CreditAssessmentSource>ICAS</CreditAssessmentSource>
<CredAssessSysProvider>Banque de France</CredAssessSysProvider>
<Rating>
<NumberValue>0.0003</NumberValue>
</Rating>
</Debtor>
<NumberOfAggregatedAssets>2</NumberOfAggregatedAssets>
<Denomination>EUR</Denomination>
<Secured>N</Secured>
<ResidualMaturity>1-3</ResidualMaturity>
<BucketSize>0_to_100</BucketSize>
<NCBComment>10107</NCBComment>
<ValuationMethodology>Outstanding</ValuationMethodology>
<NominalAmountSubmitted>10</NominalAmountSubmitted>
<MobilisationChannel>Local dom nonmkt</MobilisationChannel>
<CCB>FR</CCB>
<OaId>10107</OaId>
<ContractId>1549493</ContractId>
<InstrmntId>1549493</InstrmntId>
</NonMarketableAsset>
<NonMarketableAsset>
<MFIId>FR11188</MFIId>
<OtherRegistrationNumber>FRC000748968616</OtherRegistrationNumber>
<TypeOfSystem>pool</TypeOfSystem>
<DomesticOrXborder>domestic</DomesticOrXborder>
<NonMktAssetType>ACC</NonMktAssetType>
<DateOfMaturity>2023-09-30</DateOfMaturity>
<InterestPaymentType>Floating</InterestPaymentType>
<Cap>N</Cap>
<ReferenceRate>3MEUBOR</ReferenceRate>
<CollateralValueBeforeHaircuts>200</CollateralValueBeforeHaircuts>
<CollateralValueAfterHaircuts>160</CollateralValueAfterHaircuts>
<NumberOfAggregatedDebtors>1</NumberOfAggregatedDebtors>
<EligibleViaGuarantor>N</EligibleViaGuarantor>
<Debtor xsi:type="DG3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Name>SAS UNIP SAINT MALO AUTOMOBILES DISTRIBUTION</Name>
<ID xsi:type="NCB">
<value>FR895780419</value>
</ID>
<Class>Other</Class>
<Residence>FR</Residence>
<CreditAssessmentSource>ICAS</CreditAssessmentSource>
<CredAssessSysProvider>Banque de France</CredAssessSysProvider>
<Rating>
<NumberValue>0.0051</NumberValue>
</Rating>
</Debtor>
<NumberOfAggregatedAssets>1</NumberOfAggregatedAssets>
<Denomination>EUR</Denomination>
<Secured>N</Secured>
<ResidualMaturity>10-15</ResidualMaturity>
<NCBComment>11188</NCBComment>
<ValuationMethodology>Outstanding</ValuationMethodology>
<NominalAmountSubmitted>999999.99999999</NominalAmountSubmitted>
<ResettingPeriodMoreThanOneYear>N</ResettingPeriodMoreThanOneYear>
<MobilisationChannel>Local dom nonmkt</MobilisationChannel>
<CCB>FR</CCB>
<OaId>11188</OaId>
<ContractId>R05020ETC</ContractId>
<InstrmntId>202095459110</InstrmntId>
</NonMarketableAsset>
<NonMarketableAsset>
<MFIId>FR11188</MFIId>
<OtherRegistrationNumber>FRC000748968732</OtherRegistrationNumber>
<TypeOfSystem>pool</TypeOfSystem>
<DomesticOrXborder>domestic</DomesticOrXborder>
<NonMktAssetType>ACC</NonMktAssetType>
<DateOfMaturity>2023-09-30</DateOfMaturity>
<InterestPaymentType>Floating</InterestPaymentType>
<Cap>N</Cap>
<ReferenceRate>3MEUBOR</ReferenceRate>
<CollateralValueBeforeHaircuts>300</CollateralValueBeforeHaircuts>
<CollateralValueAfterHaircuts>201</CollateralValueAfterHaircuts>
<NumberOfAggregatedDebtors>1</NumberOfAggregatedDebtors>
<EligibleViaGuarantor>N</EligibleViaGuarantor>
<Debtor xsi:type="DG3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Name>ALLIANCE E.S.D.B</Name>
<ID xsi:type="NCB">
<value>FR347861981</value>
</ID>
<Class>Other</Class>
<Residence>FR</Residence>
<CreditAssessmentSource>ICAS</CreditAssessmentSource>
<CredAssessSysProvider>Banque de France</CredAssessSysProvider>
<Rating>
<NumberValue>0.0051</NumberValue>
</Rating>
</Debtor>
<NumberOfAggregatedAssets>1</NumberOfAggregatedAssets>
<Denomination>EUR</Denomination>
<Secured>N</Secured>
<ResidualMaturity>+30</ResidualMaturity>
<NCBComment>11188</NCBComment>
<ValuationMethodology>Outstanding</ValuationMethodology>
<NominalAmountSubmitted>0</NominalAmountSubmitted>
<ResettingPeriodMoreThanOneYear>N</ResettingPeriodMoreThanOneYear>
<MobilisationChannel>Local dom nonmkt</MobilisationChannel>
<CCB>FR</CCB>
<OaId>11188</OaId>
<ContractId>R05320ETC</ContractId>
<InstrmntId>202095459010</InstrmntId>
</NonMarketableAsset>
</NonMarketableAssets>
</DisseminationFile>

View File

View File

View File

@@ -0,0 +1,398 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/CentralizedSecuritiesDissemination
archive_prefix: ARCHIVE/CSDB/CentralizedSecuritiesDissemination
workflow_name: w_ODS_CSDB_DEBT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_DEBT_PARSE
ods_prefix: INBOX/CSDB/CentralizedSecuritiesDissemination/CSDB_DEBT
output_table: CSDB_DEBT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'Date last modified'
column_header: 'NEWUPDATED'
- type: 'csv_header'
value: 'Extraction date'
column_header: 'IDLOADDATE_DIM'
- type: 'csv_header'
value: 'ISIN code'
column_header: 'EXTERNALCODE_ISIN'
- type: 'csv_header'
value: 'National instrument code type'
column_header: 'EXTERNALCODETYPE_NC'
- type: 'csv_header'
value: 'National instrument code'
column_header: 'EXTERNALCODE_NATIONAL'
- type: 'csv_header'
value: 'Internal instrument code'
column_header: 'IDIRINSTRUMENT'
- type: 'csv_header'
value: 'Short name'
column_header: 'SHORTNAME'
- type: 'csv_header'
value: 'Bond duration'
column_header: 'VA_BONDDURATION'
- type: 'csv_header'
value: 'Debt type'
column_header: 'IDIRDEBTTYPE'
- type: 'csv_header'
value: 'Asset securitisation type'
column_header: 'IDIRASSETSECTYPE'
- type: 'csv_header'
value: 'CFI classification'
column_header: 'IDIRCLASSIFICATIONCODE_CFI'
- type: 'csv_header'
value: 'Instrument ESA 95 class'
column_header: 'IDIRCLASSIFICATIONCODE_ESAI'
- type: 'csv_header'
value: 'Instrument ESA 95 class - value type'
column_header: 'IDIRCLASSIFICATIONCODE_ESAI_DM'
- type: 'csv_header'
value: 'Nominal currency'
column_header: 'IDIRCURRENCY_NOMINAL'
- type: 'csv_header'
value: 'Amount issued'
column_header: 'AMOUNTISSUED'
- type: 'csv_header'
value: 'Amount outstanding'
column_header: 'AMOUNTOUTSTANDING'
- type: 'csv_header'
value: 'Amount outstanding in EUR'
column_header: 'AMOUNTOUTSTANDING_EUR'
- type: 'csv_header'
value: 'Pool factor'
column_header: 'POOLFACTOR'
- type: 'csv_header'
value: 'Issue price'
column_header: 'ISSUEPRICE'
- type: 'csv_header'
value: 'Issue date'
column_header: 'IDISSUEDATE'
- type: 'csv_header'
value: 'Coupon type'
column_header: 'IDIRCOUPONTYPE'
- type: 'csv_header'
value: 'Last Coupon frequency'
column_header: 'IDIRCOUPONFREQUENCY'
- type: 'csv_header'
value: 'Coupon currency'
column_header: 'IDIRCURRENCY_COUPON'
- type: 'csv_header'
value: 'Last Coupon rate'
column_header: 'COUPONRATE'
- type: 'csv_header'
value: 'Last Coupon date'
column_header: 'COUPONDATE'
- type: 'csv_header'
value: 'Redemption type'
column_header: 'IDIRREDEMPTIONTYPE'
- type: 'csv_header'
value: 'Redemption frequency'
column_header: 'IDIRREDEMPTIONFREQUENCY'
- type: 'csv_header'
value: 'Redemption currency'
column_header: 'IDIRCURRENCY_REDEMPTION'
- type: 'csv_header'
value: 'Redemption price'
column_header: 'REDEMPTIONPRICE'
- type: 'csv_header'
value: 'Maturity date'
column_header: 'IDMATURITYDATE'
- type: 'csv_header'
value: 'Issuer organisation alias type'
column_header: 'IDIRORGANISATIONALIASTYPE_IS'
- type: 'csv_header'
value: 'Issuer organisation alias code'
column_header: 'ISSUERSOURCECODE'
- type: 'csv_header'
value: 'Issuer MFI code'
column_header: 'ISSUEREXTERNALCODE_MFI'
- type: 'csv_header'
value: 'Issuer BIC code'
column_header: 'ISSUEREXTERNALCODE_BIC'
- type: 'csv_header'
value: 'Issuer BEI code'
column_header: 'ISSUEREXTERNALCODE_BEI'
- type: 'csv_header'
value: 'Internal organisation code'
column_header: 'IDIRORGANISATION_ISSUER'
- type: 'csv_header'
value: 'Issuer name'
column_header: 'ISSUERNAME'
- type: 'csv_header'
value: 'Issuer domicile country'
column_header: 'IDIRCOUNTRY'
- type: 'csv_header'
value: 'Issuer domicile country - value type'
column_header: 'IDIRCOUNTRY_DM'
- type: 'csv_header'
value: 'Issuer ESA 95 sector'
column_header: 'IDIRCLASSIFICATIONCODE_ESAO'
- type: 'csv_header'
value: 'Issuer ESA 95 sector - value type'
column_header: 'IDIRCLASSIFICATIONCODE_ESAO_DM'
- type: 'csv_header'
value: 'Issuer NACE sector'
column_header: 'IDIRCLASSIFICATIONCODE_NACE'
- type: 'csv_header'
value: 'Price date'
column_header: 'PUBLICATIONPRICEDATE'
- type: 'csv_header'
value: 'Price value'
column_header: 'PUBLICATIONPRICE'
- type: 'csv_header'
value: 'Price value - type'
column_header: 'PUBLICATIONPRICETYPE'
- type: 'csv_header'
value: 'Quotation basis'
column_header: 'PUBLICATIONPRICEQUOTATIONBASIS'
- type: 'csv_header'
value: 'Monthly average price'
column_header: 'MONTHLYAVERAGEPRICE'
- type: 'csv_header'
value: 'Accrual start date'
column_header: 'ACCRUALSTARTDATE'
- type: 'csv_header'
value: 'Accrued income factor'
column_header: 'DEBTACCRUALDEBTOR'
- type: 'csv_header'
value: 'Accrued income factor - value type'
column_header: 'DEBTACCRUALDEBTOR_DM'
- type: 'csv_header'
value: 'Accrued income (Creditor)'
column_header: 'DEBTACCRUALCREDITOR'
- type: 'csv_header'
value: 'Accrued income (Creditor) - value type'
column_header: 'DEBTACCRUALCREDITOR_TYP'
- type: 'csv_header'
value: 'Accrued interest'
column_header: 'ACCRUEDINTEREST'
- type: 'csv_header'
value: 'Yield to maturity'
column_header: 'YTMNONOPTIONADJUSTED'
- type: 'csv_header'
value: 'ESCB issuer identifier'
column_header: 'ESCB_ISSUER_IDENT'
- type: 'csv_header'
value: 'ESCB issuer identifier type'
column_header: 'VA_ESCBCODETYPE'
- type: 'csv_header'
value: 'Issuer compound ID'
column_header: 'IDUDCMPPARTY'
- type: 'csv_header'
value: 'Amount Oustanding type'
column_header: 'AMOUNTOUTSTANDINGTYPE'
- type: 'csv_header'
value: 'Market Capitalisation'
column_header: 'MARKETCAPITALISATION'
- type: 'csv_header'
value: 'Market Capitalisation in euro'
column_header: 'MARKETCAPITALISATION_EUR'
- type: 'csv_header'
value: 'Security Status'
column_header: 'VA_SECURITYSTATUS'
- type: 'csv_header'
value: 'Instrument suppl class'
column_header: 'VA_INSTRSUPPLEMENTARYCLASS'
- type: 'csv_header'
value: 'Residual maturity class'
column_header: 'VA_RESIDUALMATURITYCLASS'
- type: 'csv_header'
value: 'Is In SEC'
column_header: 'VA_ISINSEC'
- type: 'csv_header'
value: 'Is In EADB'
column_header: 'VA_ISELIGIBLEFOREADB'
- type: 'csv_header'
value: 'Instrument ESA 2010 class'
column_header: 'IDIRCLASSIFICATIONCODE_ESAI10'
- type: 'csv_header'
value: 'Issuer ESA 2010 sector'
column_header: 'IDIRCLASSIFICATIONCODE_ESAO10'
- type: 'csv_header'
value: 'Primary asset classification 2'
column_header: 'IDIRDEBTTYPE_N'
- type: 'csv_header'
value: 'Instruments seniority type'
column_header: 'SENIORITY'
- type: 'csv_header'
value: 'Issuer LEI code'
column_header: 'ISSUEREXTERNALCODE_LEI'
- type: 'csv_header'
value: 'Instrument ESA 2010 class - value type'
column_header: 'INSTR_ESA2010_CLASS_VALUETYPE'
- type: 'csv_header'
value: 'Issuer ESA 2010 class - value type'
column_header: 'ISS_ESA2010_CLASS_VALUETYPE'
- type: 'csv_header'
value: 'Security status date'
column_header: 'VA_SECURITYSTATUSDATE'
- type: 'csv_header'
value: 'Group type'
column_header: 'GROUP_TYPE'
- type: 'csv_header'
value: 'Has embedded option'
column_header: 'HASEMBEDDEDOPTION'
- type: 'csv_header'
value: 'Volume traded'
column_header: 'VOLUMETRADED'
- type: 'csv_header'
value: 'Primary listing name'
column_header: 'PRIMARYLISTINGNAME'
- type: 'csv_header'
value: 'Primary listing residency country'
column_header: 'PRIMARYLISTINGCOUNTRY'
- type: 'csv_header'
value: 'Instrument portfolio flags'
column_header: 'VA_INSTRPORTFLAGS'
- type: 'csv_header'
value: 'Residual maturity'
column_header: 'RESIDUALMATURITY'
- type: 'csv_header'
value: 'Original maturity'
column_header: 'ORIGINAL_MATURITY'
- type: 'csv_header'
value: 'CFIN classification'
column_header: 'IDIRCLASSIFICATIONCODE_CFIN'
- type: 'csv_header'
value: 'First scheduled Coupon date'
column_header: 'COUPONFIRSTPAYMENTDATE'
- type: 'csv_header'
value: 'Last scheduled Coupon date'
column_header: 'COUPONLASTPAYMENTDATE'
- type: 'csv_header'
value: 'Coupon rate underlying ISIN'
column_header: 'COUPONRATEUNDERLYINGCODE_ISIN'
- type: 'csv_header'
value: 'Coupon rate spread'
column_header: 'COUPONRATESPREAD'
- type: 'csv_header'
value: 'Coupon rate multiplier'
column_header: 'COUPONRATEMULTIPLIER'
- type: 'csv_header'
value: 'Coupon rate cap'
column_header: 'COUPONRATECAP'
- type: 'csv_header'
value: 'Coupon rate floor'
column_header: 'COUPONRATEFLOOR'
- type: 'csv_header'
value: 'Issue date tranche'
column_header: 'IDISSUEDATE_TRANCHE'
- type: 'csv_header'
value: 'Issue price tranche'
column_header: 'ISSUEPRICE_TRANCHE'
- type: 'csv_header'
value: 'Is private placement'
column_header: 'VA_ISPRIVATEPLACEMENT'
- type: 'csv_header'
value: 'RIAD code'
column_header: 'RIAD_CODE'
- type: 'csv_header'
value: 'RIAD OUID'
column_header: 'RIAD_OUID'
- type: 'csv_header'
value: 'ESG Flag 1'
column_header: 'ESG1'
- type: 'csv_header'
value: 'ESG Flag 2'
column_header: 'ESG2'
- type: 'csv_header'
value: 'ESG Flag 3'
column_header: 'ESG3'
- type: 'csv_header'
value: 'Strip'
column_header: 'STRIP'
- type: 'csv_header'
value: 'Depository receipt'
column_header: 'DEPOSITORY_RECEIPT'
- type: 'csv_header'
value: 'Rule 144A'
column_header: 'RULE_144A'
- type: 'csv_header'
value: 'Reg S'
column_header: 'REG_S'
- type: 'csv_header'
value: 'Warrant'
column_header: 'WARRANT'
- type: 'csv_header'
value: 'CSEC Relevance - stock'
column_header: 'CSEC_RELEVANCE_STOCK'
- type: 'csv_header'
value: 'CSEC relevance - gross issuance'
column_header: 'CSEC_RELEVANCE_GROSS_ISSUANCE'
- type: 'csv_header'
value: 'CSEC relevance - redemption'
column_header: 'CSEC_RELEVANCE_REDEMPTION'
- type: 'csv_header'
value: 'Accruing coupon'
column_header: 'ACCRUING_COUPON'
- type: 'csv_header'
value: 'Accruing discount'
column_header: 'ACCRUING_DISCOUNT'
- type: 'csv_header'
value: 'STEP Id'
column_header: 'STEPID'
- type: 'csv_header'
value: 'Program Name'
column_header: 'PROGRAMNAME'
- type: 'csv_header'
value: 'Program Ceiling'
column_header: 'PROGRAMCEILING'
- type: 'csv_header'
value: 'Program Status'
column_header: 'PROGRAMSTATUS'
- type: 'csv_header'
value: 'Issuer NACE21 sector'
column_header: 'ISSUERNACE21SECTOR'
- type: 'csv_header'
value: 'Instrument quotation basis'
column_header: 'INSTRUMENTQUOTATIONBASIS'
- type: 'csv_header'
value: 'placeholder 38'
column_header: 'PLACEHOLDER38'
- type: 'csv_header'
value: 'placeholder 39'
column_header: 'PLACEHOLDER39'
- type: 'csv_header'
value: 'placeholder 40'
column_header: 'PLACEHOLDER40'
- type: 'csv_header'
value: 'placeholder 41'
column_header: 'PLACEHOLDER41'
- type: 'csv_header'
value: 'placeholder 42'
column_header: 'PLACEHOLDER42'
- type: 'csv_header'
value: 'placeholder 43'
column_header: 'PLACEHOLDER43'
- type: 'csv_header'
value: 'placeholder 44'
column_header: 'PLACEHOLDER44'
- type: 'csv_header'
value: 'placeholder 45'
column_header: 'PLACEHOLDER45'
- type: 'csv_header'
value: 'placeholder 46'
column_header: 'PLACEHOLDER46'
- type: 'csv_header'
value: 'placeholder 47'
column_header: 'PLACEHOLDER47'
- type: 'csv_header'
value: 'placeholder 48'
column_header: 'PLACEHOLDER48'
- type: 'csv_header'
value: 'placeholder 49'
column_header: 'PLACEHOLDER49'
- type: 'csv_header'
value: 'placeholder 50'
column_header: 'PLACEHOLDER50'

View File

@@ -0,0 +1,400 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination
archive_prefix: ARCHIVE/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination
workflow_name: w_ODS_CSDB_DEBT_DAILY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_DEBT_DAILY_PARSE
ods_prefix: INBOX/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination/CSDB_DEBT_DAILY
output_table: CSDB_DEBT_DAILY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'Date last modified'
column_header: 'Date last modified'
- type: 'csv_header'
value: 'Extraction date'
column_header: 'Extraction date'
- type: 'csv_header'
value: 'ISIN code'
column_header: 'ISIN code'
- type: 'csv_header'
value: 'National instrument code type'
column_header: 'National instrument code type'
- type: 'csv_header'
value: 'National instrument code'
column_header: 'National instrument code'
- type: 'csv_header'
value: 'Internal instrument code'
column_header: 'Internal instrument code'
- type: 'csv_header'
value: 'Short name'
column_header: 'Short name'
- type: 'csv_header'
value: 'Bond duration'
column_header: 'Bond duration'
- type: 'csv_header'
value: 'Debt type'
column_header: 'Debt type'
- type: 'csv_header'
value: 'Asset securitisation type'
column_header: 'Asset securitisation type'
- type: 'csv_header'
value: 'CFI classification'
column_header: 'CFI classification'
- type: 'csv_header'
value: 'Instrument ESA 95 class'
column_header: 'Instrument ESA 95 class'
- type: 'csv_header'
value: 'Instrument ESA 95 class - value type'
column_header: 'Instrument ESA 95 class - value type'
- type: 'csv_header'
value: 'Nominal currency'
column_header: 'Nominal currency'
- type: 'csv_header'
value: 'Amount issued'
column_header: 'Amount issued'
- type: 'csv_header'
value: 'Amount outstanding'
column_header: 'Amount outstanding'
- type: 'csv_header'
value: 'Amount outstanding in EUR'
column_header: 'Amount outstanding in EUR'
- type: 'csv_header'
value: 'Pool factor'
column_header: 'Pool factor'
- type: 'csv_header'
value: 'Issue price'
column_header: 'Issue price'
- type: 'csv_header'
value: 'Issue date'
column_header: 'Issue date'
- type: 'csv_header'
value: 'Coupon type'
column_header: 'Coupon type'
- type: 'csv_header'
value: 'Last Coupon frequency'
column_header: 'Last Coupon frequency'
- type: 'csv_header'
value: 'Coupon currency'
column_header: 'Coupon currency'
- type: 'csv_header'
value: 'Last Coupon rate'
column_header: 'Last Coupon rate'
- type: 'csv_header'
value: 'Last Coupon date'
column_header: 'Last Coupon date'
- type: 'csv_header'
value: 'Redemption type'
column_header: 'Redemption type'
- type: 'csv_header'
value: 'Redemption frequency'
column_header: 'Redemption frequency'
- type: 'csv_header'
value: 'Redemption currency'
column_header: 'Redemption currency'
- type: 'csv_header'
value: 'Redemption price'
column_header: 'Redemption price'
- type: 'csv_header'
value: 'Maturity date'
column_header: 'Maturity date'
- type: 'csv_header'
value: 'Issuer organisation alias type'
column_header: 'Issuer organisation alias type'
- type: 'csv_header'
value: 'Issuer organisation alias code'
column_header: 'Issuer organisation alias code'
- type: 'csv_header'
value: 'Issuer MFI code'
column_header: 'Issuer MFI code'
- type: 'csv_header'
value: 'Issuer BIC code'
column_header: 'Issuer BIC code'
- type: 'csv_header'
value: 'Issuer BEI code'
column_header: 'Issuer BEI code'
- type: 'csv_header'
value: 'Internal organisation code'
column_header: 'Internal organisation code'
- type: 'csv_header'
value: 'Issuer name'
column_header: 'Issuer name'
- type: 'csv_header'
value: 'Issuer domicile country'
column_header: 'Issuer domicile country'
- type: 'csv_header'
value: 'Issuer domicile country - value type'
column_header: 'Issuer domicile country - value type'
- type: 'csv_header'
value: 'Issuer ESA 95 sector'
column_header: 'Issuer ESA 95 sector'
- type: 'csv_header'
value: 'Issuer ESA 95 sector - value type'
column_header: 'Issuer ESA 95 sector - value type'
- type: 'csv_header'
value: 'Issuer NACE sector'
column_header: 'Issuer NACE sector'
- type: 'csv_header'
value: 'Price date'
column_header: 'Price date'
- type: 'csv_header'
value: 'Price value'
column_header: 'Price value'
- type: 'csv_header'
value: 'Price value - type'
column_header: 'Price value - type'
- type: 'csv_header'
value: 'Quotation basis'
column_header: 'Quotation basis'
- type: 'csv_header'
value: 'Monthly average price'
column_header: 'Monthly average price'
- type: 'csv_header'
value: 'Accrual start date'
column_header: 'Accrual start date'
- type: 'csv_header'
value: 'Accrued income factor'
column_header: 'Accrued income factor'
- type: 'csv_header'
value: 'Accrued income factor - value type'
column_header: 'Accrued income factor - value type'
- type: 'csv_header'
value: 'Accrued income (Creditor)'
column_header: 'Accrued income (Creditor)'
- type: 'csv_header'
value: 'Accrued income (Creditor) - value type'
column_header: 'Accrued income (Creditor) - value type'
- type: 'csv_header'
value: 'Accrued interest'
column_header: 'Accrued interest'
- type: 'csv_header'
value: 'Yield to maturity'
column_header: 'Yield to maturity'
- type: 'csv_header'
value: 'ESCB issuer identifier'
column_header: 'ESCB issuer identifier'
- type: 'csv_header'
value: 'ESCB issuer identifier type'
column_header: 'ESCB issuer identifier type'
- type: 'csv_header'
value: 'Issuer compound ID'
column_header: 'Issuer compound ID'
- type: 'csv_header'
value: 'Amount Oustanding type'
column_header: 'Amount Oustanding type'
- type: 'csv_header'
value: 'Market Capitalisation'
column_header: 'Market Capitalisation'
- type: 'csv_header'
value: 'Market Capitalisation in euro'
column_header: 'Market Capitalisation in euro'
- type: 'csv_header'
value: 'Security Status'
column_header: 'Security Status'
- type: 'csv_header'
value: 'Instrument suppl class'
column_header: 'Instrument suppl class'
- type: 'csv_header'
value: 'Residual maturity class'
column_header: 'Residual maturity class'
- type: 'csv_header'
value: 'Is In SEC'
column_header: 'Is In SEC'
- type: 'csv_header'
value: 'Is In EADB'
column_header: 'Is In EADB'
- type: 'csv_header'
value: 'Instrument ESA 2010 class'
column_header: 'Instrument ESA 2010 class'
- type: 'csv_header'
value: 'Issuer ESA 2010 sector'
column_header: 'Issuer ESA 2010 sector'
- type: 'csv_header'
value: 'Primary asset classification 2'
column_header: 'Primary asset classification 2'
- type: 'csv_header'
value: 'Instruments seniority type'
column_header: 'Instruments seniority type'
- type: 'csv_header'
value: 'Issuer LEI code'
column_header: 'Issuer LEI code'
- type: 'csv_header'
value: 'Instrument ESA 2010 class - value type'
column_header: 'Instrument ESA 2010 class - value type'
- type: 'csv_header'
value: 'Issuer ESA 2010 class - value type'
column_header: 'Issuer ESA 2010 class - value type'
- type: 'csv_header'
value: 'Security status date'
column_header: 'Security status date'
- type: 'csv_header'
value: 'Group type'
column_header: 'Group type'
- type: 'csv_header'
value: 'Has embedded option'
column_header: 'Has embedded option'
- type: 'csv_header'
value: 'Volume traded'
column_header: 'Volume traded'
- type: 'csv_header'
value: 'Primary listing name'
column_header: 'Primary listing name'
- type: 'csv_header'
value: 'Primary listing residency country'
column_header: 'Primary listing residency country'
- type: 'csv_header'
value: 'Instrument portfolio flags'
column_header: 'Instrument portfolio flags'
- type: 'static'
value: ''
column_header: 'BOND_DURATION'
- type: 'csv_header'
value: 'Residual maturity'
column_header: 'Residual maturity'
- type: 'csv_header'
value: 'Original maturity'
column_header: 'Original maturity'
- type: 'csv_header'
value: 'CFIN classification'
column_header: 'CFIN classification'
- type: 'csv_header'
value: 'First scheduled Coupon date'
column_header: 'First scheduled Coupon date'
- type: 'csv_header'
value: 'Last scheduled Coupon date'
column_header: 'Last scheduled Coupon date'
- type: 'csv_header'
value: 'Coupon rate underlying ISIN'
column_header: 'Coupon rate underlying ISIN'
- type: 'csv_header'
value: 'Coupon rate spread'
column_header: 'Coupon rate spread'
- type: 'csv_header'
value: 'Coupon rate multiplier'
column_header: 'Coupon rate multiplier'
- type: 'csv_header'
value: 'Coupon rate cap'
column_header: 'Coupon rate cap'
- type: 'csv_header'
value: 'Coupon rate floor'
column_header: 'Coupon rate floor'
- type: 'csv_header'
value: 'Issue date tranche'
column_header: 'Issue date tranche'
- type: 'csv_header'
value: 'Issue price tranche'
column_header: 'Issue price tranche'
- type: 'csv_header'
value: 'Is private placement'
column_header: 'Is private placement'
- type: 'csv_header'
value: 'RIAD code'
column_header: 'RIAD code'
- type: 'csv_header'
value: 'RIAD OUID'
column_header: 'RIAD OUID'
- type: 'csv_header'
value: 'ESG Flag 1'
column_header: 'ESG Flag 1'
- type: 'csv_header'
value: 'ESG Flag 2'
column_header: 'ESG Flag 2'
- type: 'csv_header'
value: 'ESG Flag 3'
column_header: 'ESG Flag 3'
- type: 'csv_header'
value: 'Strip'
column_header: 'Strip'
- type: 'csv_header'
value: 'Depository receipt'
column_header: 'Depository receipt'
- type: 'csv_header'
value: 'Rule 144A'
column_header: 'Rule 144A'
- type: 'csv_header'
value: 'Reg S'
column_header: 'Reg S'
- type: 'csv_header'
value: 'Warrant'
column_header: 'Warrant'
- type: 'csv_header'
value: 'CSEC Relevance - stock'
column_header: 'CSEC Relevance - stock'
- type: 'csv_header'
value: 'CSEC relevance - gross issuance'
column_header: 'CSEC relevance - gross issuance'
- type: 'csv_header'
value: 'CSEC relevance - redemption'
column_header: 'CSEC relevance - redemption'
- type: 'csv_header'
value: 'Accruing coupon'
column_header: 'Accruing coupon'
- type: 'csv_header'
value: 'Accruing discount'
column_header: 'Accruing discount'
- type: 'csv_header'
value: 'STEP Id'
column_header: 'STEP Id'
- type: 'csv_header'
value: 'Program Name'
column_header: 'Program Name'
- type: 'csv_header'
value: 'Program Ceiling'
column_header: 'Program Ceiling'
- type: 'csv_header'
value: 'Program Status'
column_header: 'Program Status'
- type: 'csv_header'
value: 'Issuer NACE21 sector'
column_header: 'Issuer NACE21 sector'
- type: 'csv_header'
value: 'Instrument quotation basis'
column_header: 'Instrument quotation basis'
- type: 'csv_header'
value: 'placeholder 38'
column_header: 'placeholder 38'
- type: 'csv_header'
value: 'placeholder 39'
column_header: 'placeholder 39'
- type: 'csv_header'
value: 'placeholder 40'
column_header: 'placeholder 40'
- type: 'csv_header'
value: 'placeholder 41'
column_header: 'placeholder 41'
- type: 'csv_header'
value: 'placeholder 42'
column_header: 'placeholder 42'
- type: 'csv_header'
value: 'placeholder 43'
column_header: 'placeholder 43'
- type: 'csv_header'
value: 'placeholder 44'
column_header: 'placeholder 44'
- type: 'csv_header'
value: 'placeholder 45'
column_header: 'placeholder 45'
- type: 'csv_header'
value: 'placeholder 46'
column_header: 'placeholder 46'
- type: 'csv_header'
value: 'placeholder 47'
column_header: 'placeholder 47'
- type: 'csv_header'
value: 'placeholder 48'
column_header: 'placeholder 48'
- type: 'csv_header'
value: 'placeholder 49'
column_header: 'placeholder 49'
- type: 'csv_header'
value: 'placeholder 50'
column_header: 'placeholder 50'

View File

View File

@@ -0,0 +1,103 @@
encoding_type: latin1
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
##file format
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_INSTR_DESC_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_INSTR_DESC_FULL
output_table: CSDB_INSTR_DESC_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_INSTR_ID'
column_header: 'IDIRINSTRUMENT'
- type: 'csv_header'
value: 'ISIN'
column_header: 'ISIN'
- type: 'csv_header'
value: 'MOO_INSTR_ID'
column_header: 'MOO_INSTR_ID'
- type: 'csv_header'
value: 'SNP_INSTR_ID'
column_header: 'SNP_INSTR_ID'
- type: 'csv_header'
value: 'FTC_INSTR_ID'
column_header: 'FITCH_IDENTIFIER'
- type: 'csv_header'
value: 'DBR_INSTR_ID'
column_header: 'DBRS_IDENTIFIER'
- type: 'csv_header'
value: 'EA_STATUS'
column_header: 'EA_STATUS'
- type: 'csv_header'
value: 'IS_TMS'
column_header: 'IS_TMS'
- type: 'csv_header'
value: 'DBRS_COVERED_BOND_PROGRAM_ID'
column_header: 'DBRS_COVERED_BOND_PROGRAM'
- type: 'csv_header'
value: 'FITCH_PROGRAM_ID'
column_header: 'FITCH_PRG_IDENTIFIER'
- type: 'csv_header'
value: 'MOO_DEAL_NUMBER'
column_header: 'MOO_DEAL_NUMBER'
- type: 'csv_header'
value: 'SNP_PROGRAM_ID'
column_header: 'SNP_PROGRAM_ID'
- type: 'csv_header'
value: 'DBRS_DEBT_TYPE'
column_header: 'IDIRDEBTTYPE'
- type: 'csv_header'
value: 'SNP_DEBT_TYPE'
column_header: 'SNP_DEBT_TYPE'
- type: 'csv_header'
value: 'MOODY_SENIORITY'
column_header: 'MOODY_SENIORITY'
- type: 'csv_header'
value: 'FITCH_DEBT_LEVEL_CODE'
column_header: 'FITCH_DEBT_LEVEL_CODE'
- type: 'csv_header'
value: 'DBRS_RANK_TYPE'
column_header: 'DBRS_RANK_TYPE'
- type: 'csv_header'
value: 'DBRS_SECURITY_TYPE'
column_header: 'DBRS_SECURITY_TYPE'
- type: 'csv_header'
value: 'SCO_DEBT_TYPE'
column_header: 'SCO_DEBT_TYPE'
- type: 'csv_header'
value: 'SCO_INSTR_ID'
column_header: 'SCO_INSTR_ID'
- type: 'csv_header'
value: 'SCO_COVERED_BOND_PROGRAM'
column_header: 'SCO_COVERED_BOND_PROGRAM'
- type: 'csv_header'
value: 'SCO_CATEGORY'
column_header: 'SCO_CATEGORY'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,130 @@
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_INSTR_RAT_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_INSTR_RAT_FULL
output_table: CSDB_INSTR_RAT_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_INSTR_ID'
column_header: 'RDB_INSTR_ID'
- type: 'csv_header'
value: 'SOURCE'
column_header: 'SOURCE'
- type: 'csv_header'
value: 'RATING_SCHEME'
column_header: 'RATING_SCHEME'
- type: 'csv_header'
value: 'RATING'
column_header: 'RATING'
- type: 'csv_header'
value: 'RATING_DATE'
column_header: 'RATING_DATE'
- type: 'csv_header'
value: 'TIME_HORIZON'
column_header: 'TIME_HORIZON'
- type: 'csv_header'
value: 'CURRENCY_TYPE'
column_header: 'CURRENCY_TYPE'
- type: 'csv_header'
value: 'NOTES'
column_header: 'NOTES'
- type: 'csv_header'
value: 'VALID_FROM'
column_header: 'VALID_FROM'
- type: 'csv_header'
value: 'VALID_UNTIL'
column_header: 'VALID_UNTIL'
- type: 'csv_header'
value: 'RDB_RATINGS_ID'
column_header: 'RDB_RATINGS_ID'
- type: 'csv_header'
value: 'OUTLOOK'
column_header: 'WATCHLIST'
- type: 'csv_header'
value: 'OUTLOOK_DATE'
column_header: 'WATCHLIST_DATE'
- type: 'csv_header'
value: 'WATCHLIST'
column_header: 'OUTLOOK'
- type: 'csv_header'
value: 'WATCHLIST_DATE'
column_header: 'OUTLOOK_DATE'
- type: 'csv_header'
value: 'RATING_ACTION'
column_header: 'RATING_ACTION'
- type: 'csv_header'
value: 'RATING_ACTION_DATE'
column_header: 'RATING_ACTION_DATE'
- type: 'csv_header'
value: 'IS_PRELIMINARY'
column_header: 'IS_PRELIMINARY'
- type: 'csv_header'
value: 'RATING_RAW'
column_header: 'RATING_RAW'
- type: 'csv_header'
value: 'RATING_TYPE'
column_header: 'RATING_TYPE'
- type: 'csv_header'
value: 'ENDORSEMENT_INDICATOR'
column_header: 'ENDORSEMENT_INDICATOR'
- type: 'csv_header'
value: 'LAST_REVIEW_DATE'
column_header: 'LAST_REVIEW_DATE'
- type: 'csv_header'
value: 'PLACEHOLDER6'
column_header: 'PLACEHOLDER6'
- type: 'csv_header'
value: 'PLACEHOLDER7'
column_header: 'PLACEHOLDER7'
- type: 'csv_header'
value: 'PLACEHOLDER8'
column_header: 'PLACEHOLDER8'
- type: 'csv_header'
value: 'PLACEHOLDER9'
column_header: 'PLACEHOLDER9'
- type: 'csv_header'
value: 'PLACEHOLDER10'
column_header: 'PLACEHOLDER10'
- type: 'csv_header'
value: 'PLACEHOLDER11'
column_header: 'PLACEHOLDER11'
- type: 'csv_header'
value: 'PLACEHOLDER12'
column_header: 'PLACEHOLDER12'
- type: 'csv_header'
value: 'PLACEHOLDER13'
column_header: 'PLACEHOLDER13'
- type: 'csv_header'
value: 'PLACEHOLDER14'
column_header: 'PLACEHOLDER14'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,106 @@
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_ISSUER_DESC_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_ISSUER_DESC_FULL
output_table: CSDB_ISSUER_DESC_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_ISSUER_ID'
column_header: 'RDB_ISSUER_ID'
- type: 'csv_header'
value: 'ISSUER_NAME'
column_header: 'ISSUERNAME'
- type: 'csv_header'
value: 'COUNTRY_DOMICILE'
column_header: 'COUNTRY_DOMICILE'
- type: 'csv_header'
value: 'IS_SOVEREIGN'
column_header: 'IS_SOVEREIGN'
- type: 'csv_header'
value: 'MOO_ISSUER_ID'
column_header: 'MOODY_IDENTIFIER'
- type: 'csv_header'
value: 'SNP_ISSUER_ID'
column_header: 'SNP_ISSUER_ID'
- type: 'csv_header'
value: 'FTC_ISSUER_ID'
column_header: 'FITCH_IDENTIFIER'
- type: 'csv_header'
value: 'DBR_ISSUER_ID'
column_header: 'DBRS_IDENTIFIER'
- type: 'csv_header'
value: 'LEI_ISSUER_ID'
column_header: 'LEI_ISSUER_ID'
- type: 'csv_header'
value: 'RIAD_CODE'
column_header: 'RIAD_CODE'
- type: 'csv_header'
value: 'RIAD_OUID'
column_header: 'RIAD_OUID'
- type: 'csv_header'
value: 'CLASH_GROUP_STATUS'
column_header: 'CLASH_GROUP_STATUS'
- type: 'csv_header'
value: 'SCO_ISSUER_ID'
column_header: 'SCO_ISSUER_ID'
- type: 'csv_header'
value: 'PLACEHOLDER5'
column_header: 'PLACEHOLDER5'
- type: 'csv_header'
value: 'PLACEHOLDER6'
column_header: 'PLACEHOLDER6'
- type: 'csv_header'
value: 'PLACEHOLDER7'
column_header: 'PLACEHOLDER7'
- type: 'csv_header'
value: 'PLACEHOLDER8'
column_header: 'PLACEHOLDER8'
- type: 'csv_header'
value: 'PLACEHOLDER9'
column_header: 'PLACEHOLDER9'
- type: 'csv_header'
value: 'PLACEHOLDER10'
column_header: 'PLACEHOLDER10'
- type: 'csv_header'
value: 'PLACEHOLDER11'
column_header: 'PLACEHOLDER11'
- type: 'csv_header'
value: 'PLACEHOLDER12'
column_header: 'PLACEHOLDER12'
- type: 'csv_header'
value: 'PLACEHOLDER13'
column_header: 'PLACEHOLDER13'
- type: 'csv_header'
value: 'PLACEHOLDER14'
column_header: 'PLACEHOLDER14'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,131 @@
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_ISSUER_RAT_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_ISSUER_RAT_FULL
output_table: CSDB_ISSUER_RAT_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_ISSUER_ID'
column_header: 'RDB_ISSUER_ID'
- type: 'csv_header'
value: 'SOURCE'
column_header: 'SOURCE'
- type: 'csv_header'
value: 'RATING_SCHEME'
column_header: 'RATING_SCHEME'
- type: 'csv_header'
value: 'RATING'
column_header: 'RATING'
- type: 'csv_header'
value: 'RATING_DATE'
column_header: 'RATING_DATE'
- type: 'csv_header'
value: 'TIME_HORIZON'
column_header: 'TIME_HORIZON'
- type: 'csv_header'
value: 'CURRENCY_TYPE'
column_header: 'CURRENCY_TYPE'
- type: 'csv_header'
value: 'NOTES'
column_header: 'NOTES'
- type: 'csv_header'
value: 'VALID_FROM'
column_header: 'VALID_FROM'
- type: 'csv_header'
value: 'VALID_UNTIL'
column_header: 'VALID_UNTIL'
- type: 'csv_header'
value: 'RDB_RATINGS_ID'
column_header: 'RDB_RATINGS_ID'
- type: 'csv_header'
value: 'OUTLOOK'
column_header: 'OUTLOOK'
- type: 'csv_header'
value: 'OUTLOOK_DATE'
column_header: 'OUTLOOK_DATE'
- type: 'csv_header'
value: 'WATCHLIST'
column_header: 'WATCHLIST'
- type: 'csv_header'
value: 'WATCHLIST_DATE'
column_header: 'WATCHLIST_DATE'
- type: 'csv_header'
value: 'RATING_ACTION'
column_header: 'RATING_ACTION'
- type: 'csv_header'
value: 'RATING_ACTION_DATE'
column_header: 'RATING_ACTION_DATE'
- type: 'csv_header'
value: 'IS_PRELIMINARY'
column_header: 'IS_PRELIMINARY'
- type: 'csv_header'
value: 'RATING_RAW'
column_header: 'RATING_RAW'
- type: 'csv_header'
value: 'RATING_TYPE'
column_header: 'RATING_TYPE'
- type: 'csv_header'
value: 'ENDORSEMENT_INDICATOR'
column_header: 'ENDORSEMENT_INDICATOR'
- type: 'csv_header'
value: 'LAST_REVIEW_DATE'
column_header: 'LAST_REVIEW_DATE'
- type: 'csv_header'
value: 'PLACEHOLDER6'
column_header: 'PLACEHOLDER6'
- type: 'csv_header'
value: 'PLACEHOLDER7'
column_header: 'PLACEHOLDER7'
- type: 'csv_header'
value: 'PLACEHOLDER8'
column_header: 'PLACEHOLDER8'
- type: 'csv_header'
value: 'PLACEHOLDER9'
column_header: 'PLACEHOLDER9'
- type: 'csv_header'
value: 'PLACEHOLDER10'
column_header: 'PLACEHOLDER10'
- type: 'csv_header'
value: 'PLACEHOLDER11'
column_header: 'PLACEHOLDER11'
- type: 'csv_header'
value: 'PLACEHOLDER12'
column_header: 'PLACEHOLDER12'
- type: 'csv_header'
value: 'PLACEHOLDER13'
column_header: 'PLACEHOLDER13'
- type: 'csv_header'
value: 'PLACEHOLDER14'
column_header: 'PLACEHOLDER14'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,420 @@
import sys
import os
import json
import logging
from pathlib import Path
from datetime import timedelta, datetime, timezone
from email.utils import parsedate_to_datetime
from airflow import DAG
from airflow.models import Variable
from airflow.decorators import task as af_task
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.python import get_current_context
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
dag_id = "w_ODS_CSDB_RATINGS_FULL_COORDINATOR"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
OCI_BUCKET = os.getenv("INBOX_BUCKET")
OBJECT_PREFIX = os.getenv("OBJECT_PREFIX", "csdb/ratings/full/")
REPROCESS = (os.getenv("CSDB_REPROCESS", "false").lower() in ("1", "true", "yes"))
LAST_TS_VAR = f"{dag_id}__last_seen_ts"
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts"
def _oci_client():
import oci
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
try:
rp_signer = oci.auth.signers.get_resource_principals_signer()
cfg = {"region": region} if region else {}
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
except Exception as e:
logging.info("RP not available: %s", e)
try:
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
cfg = {"region": region} if region else {}
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
except Exception as e:
logging.info("IP not available: %s", e)
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
raise RuntimeError("Failed to create OCI client")
def _load_processed_map() -> dict[str, float]:
try:
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
m = json.loads(raw) or {}
if isinstance(m, dict):
return {k: float(v) for k, v in m.items()}
except Exception:
pass
return {}
def _list_all_zip_objects(include_processed: bool = False) -> list[dict]:
"""List all zip files in the bucket"""
if not OCI_NAMESPACE or not OCI_BUCKET:
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
client = _oci_client()
processed_map = _load_processed_map() if not include_processed else {}
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=OBJECT_PREFIX)
all_items: list[dict] = []
for o in (resp.data.objects or []):
name = (o.name or "").strip()
base = name.rsplit("/", 1)[-1] if name else ""
if not name or name.endswith('/') or not base:
continue
if not ("STC-FullRatingsDissemination" in base and base.lower().endswith(".zip")):
continue
# Get timestamp
ts = None
t = getattr(o, "time_created", None)
if t:
try:
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
except Exception:
ts = None
if ts is None:
try:
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
if lm:
dt = parsedate_to_datetime(lm)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
ts = dt.timestamp()
except Exception as e:
logging.warning("head_object failed for %s: %s", name, e)
if ts is None:
ts = datetime.now(timezone.utc).timestamp()
# Check if already processed
last_proc_ts = float(processed_map.get(name, 0.0))
is_processed = (ts <= last_proc_ts) if processed_map else False
item = {
"name": name,
"base": base,
"mtime": ts,
"is_processed": is_processed
}
all_items.append(item)
# Sort by timestamp (oldest first)
all_items.sort(key=lambda x: x["mtime"])
return all_items
def _list_new_zip_objects() -> list[dict]:
"""List only new/unprocessed zip files"""
all_items = _list_all_zip_objects(include_processed=False)
# Filter out processed items
new_items = [item for item in all_items if not item.get("is_processed", False)]
logging.info("Found %d new STC-FullRatingsDissemination zip file(s) (sorted oldest to newest)", len(new_items))
return new_items
def _find_specific_zip(filename_pattern: str) -> dict:
"""Find a specific zip file by name pattern"""
all_items = _list_all_zip_objects(include_processed=True)
# Try exact match first
for item in all_items:
if item["base"] == filename_pattern or item["name"] == filename_pattern:
logging.info("Found exact match: %s", item["base"])
return item
# Try partial match
for item in all_items:
if filename_pattern.lower() in item["base"].lower():
logging.info("Found partial match: %s", item["base"])
return item
raise AirflowFailException(f"No zip file found matching pattern: {filename_pattern}")
with DAG(
dag_id=dag_id,
default_args=default_args,
description='CSDB Ratings Full Coordinator: Lists and triggers processing for zip files',
schedule_interval="0 */6 * * *", # Every 6 hours, adjust as needed
catchup=False,
max_active_runs=1,
render_template_as_native_obj=True,
tags=["CSDB", "COORDINATOR", "ODS", "OCI", "RATINGS"],
) as dag:
@af_task(task_id="determine_processing_mode")
def determine_processing_mode(**context):
"""
Determine what to process based on dag_run configuration.
Configuration options:
1. No config or mode='all': Process all new zip files
2. mode='specific' + filename='xxx': Process specific zip file
3. mode='reprocess_all': Reprocess all zip files (including already processed)
4. mode='list_only': Just list available files without processing
5. filenames=['file1.zip', 'file2.zip']: Process specific list of files
"""
conf = context.get('dag_run').conf or {}
mode = conf.get('mode', 'all')
filename = conf.get('filename')
filenames = conf.get('filenames', [])
force_reprocess = conf.get('force_reprocess', False)
limit = conf.get('limit') # Limit number of files to process
logging.info("Processing mode: %s", mode)
logging.info("Configuration: %s", json.dumps(conf, indent=2))
result = {
"mode": mode,
"filename": filename,
"filenames": filenames,
"force_reprocess": force_reprocess,
"limit": limit
}
return result
@af_task(task_id="list_zip_files")
def list_zip_files(mode_config: dict):
"""List zip files based on the processing mode"""
mode = mode_config.get("mode", "all")
filename = mode_config.get("filename")
filenames = mode_config.get("filenames", [])
force_reprocess = mode_config.get("force_reprocess", False)
limit = mode_config.get("limit")
zip_files = []
if mode == "list_only":
# Just list all files for information
all_files = _list_all_zip_objects(include_processed=True)
logging.info("=== Available ZIP Files ===")
for idx, f in enumerate(all_files, 1):
status = "PROCESSED" if f.get("is_processed") else "NEW"
logging.info("%d. [%s] %s (mtime: %s)",
idx, status, f["base"],
datetime.fromtimestamp(f["mtime"]).isoformat())
raise AirflowSkipException("List only mode - no processing triggered")
elif mode == "specific":
# Process a specific file
if not filename:
raise AirflowFailException("mode='specific' requires 'filename' parameter")
zip_file = _find_specific_zip(filename)
zip_files = [zip_file]
logging.info("Processing specific file: %s", zip_file["base"])
elif mode == "specific_list":
# Process a list of specific files
if not filenames:
raise AirflowFailException("mode='specific_list' requires 'filenames' parameter")
for fn in filenames:
try:
zip_file = _find_specific_zip(fn)
zip_files.append(zip_file)
except Exception as e:
logging.warning("Could not find file %s: %s", fn, e)
if not zip_files:
raise AirflowFailException("None of the specified files were found")
logging.info("Processing %d specific files", len(zip_files))
elif mode == "reprocess_all":
# Reprocess all files (including already processed)
all_files = _list_all_zip_objects(include_processed=True)
zip_files = all_files
logging.info("Reprocessing all %d files", len(zip_files))
elif mode == "date_range":
# Process files within a date range
start_date = mode_config.get("start_date")
end_date = mode_config.get("end_date")
if not start_date or not end_date:
raise AirflowFailException("mode='date_range' requires 'start_date' and 'end_date'")
start_ts = datetime.fromisoformat(start_date).timestamp()
end_ts = datetime.fromisoformat(end_date).timestamp()
all_files = _list_all_zip_objects(include_processed=True)
zip_files = [f for f in all_files if start_ts <= f["mtime"] <= end_ts]
logging.info("Found %d files in date range %s to %s",
len(zip_files), start_date, end_date)
else: # mode == "all" or default
# Process all new files
zip_files = _list_new_zip_objects()
if not zip_files:
logging.info("No new zip files to process")
raise AirflowSkipException("No new zip files found")
# Apply limit if specified
if limit and isinstance(limit, int) and limit > 0:
original_count = len(zip_files)
zip_files = zip_files[:limit]
logging.info("Limited processing from %d to %d files", original_count, len(zip_files))
# Sort by timestamp (oldest first)
zip_files.sort(key=lambda x: x["mtime"])
logging.info("Selected %d zip file(s) for processing:", len(zip_files))
for idx, f in enumerate(zip_files, 1):
logging.info("%d. %s (mtime: %s)",
idx, f["base"],
datetime.fromtimestamp(f["mtime"]).isoformat())
return {
"zip_files": zip_files,
"mode": mode,
"force_reprocess": force_reprocess
}
@af_task(task_id="trigger_processing_dags")
def trigger_processing_dags(list_result: dict):
"""Trigger the processing DAG for each zip file sequentially"""
from airflow.api.common.trigger_dag import trigger_dag
from time import sleep
zip_files = list_result.get("zip_files", [])
mode = list_result.get("mode", "all")
force_reprocess = list_result.get("force_reprocess", False)
if not zip_files:
logging.info("No zip files to process")
return []
triggered_runs = []
for idx, zip_file in enumerate(zip_files):
conf = {
"zip_object_name": zip_file["name"],
"zip_base_name": zip_file["base"],
"zip_mtime": zip_file["mtime"],
"sequence_number": idx + 1,
"total_files": len(zip_files),
"processing_mode": mode,
"force_reprocess": force_reprocess,
"is_processed": zip_file.get("is_processed", False)
}
logging.info(f"Triggering processing DAG for file {idx + 1}/{len(zip_files)}: {zip_file['base']}")
try:
run_id = trigger_dag(
dag_id="w_ODS_CSDB_RATINGS_FULL_CORE",
run_id=f"coordinator__{datetime.now().strftime('%Y%m%d_%H%M%S')}__{idx}",
conf=conf,
execution_date=None,
replace_microseconds=False,
)
triggered_runs.append({
"run_id": str(run_id),
"zip_file": zip_file["base"],
"sequence": idx + 1,
"status": "triggered"
})
logging.info(f"Successfully triggered run: {run_id}")
except Exception as e:
logging.error(f"Failed to trigger processing for {zip_file['base']}: {e}")
triggered_runs.append({
"zip_file": zip_file["base"],
"sequence": idx + 1,
"status": "failed",
"error": str(e)
})
# Small delay between triggers to avoid overwhelming the system
sleep(2)
logging.info(f"Triggered {len([r for r in triggered_runs if r.get('status') == 'triggered'])} processing DAG runs")
logging.info(f"Failed to trigger {len([r for r in triggered_runs if r.get('status') == 'failed'])} runs")
return triggered_runs
@af_task(task_id="summary_report")
def summary_report(trigger_result: list):
"""Generate a summary report of triggered runs"""
if not trigger_result:
logging.info("No runs were triggered")
return
successful = [r for r in trigger_result if r.get("status") == "triggered"]
failed = [r for r in trigger_result if r.get("status") == "failed"]
logging.info("=" * 80)
logging.info("PROCESSING SUMMARY")
logging.info("=" * 80)
logging.info(f"Total files: {len(trigger_result)}")
logging.info(f"Successfully triggered: {len(successful)}")
logging.info(f"Failed to trigger: {len(failed)}")
if successful:
logging.info("\nSuccessfully triggered:")
for r in successful:
logging.info(f" - {r['zip_file']} (run_id: {r['run_id']})")
if failed:
logging.info("\nFailed to trigger:")
for r in failed:
logging.info(f" - {r['zip_file']} (error: {r.get('error', 'unknown')})")
logging.info("=" * 80)
return {
"total": len(trigger_result),
"successful": len(successful),
"failed": len(failed)
}
# Build DAG structure
mode_task = determine_processing_mode()
list_task = list_zip_files(mode_task)
trigger_task = trigger_processing_dags(list_task)
summary_task = summary_report(trigger_task)
mode_task >> list_task >> trigger_task >> summary_task
logging.info("CSDB Ratings Full Coordinator DAG ready")

View File

@@ -0,0 +1,388 @@
import sys
import os
import json
import logging
import zipfile
from pathlib import Path
from datetime import timedelta, datetime, timezone
from airflow import DAG
from airflow.models import Variable
from airflow.decorators import task as af_task
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import get_current_context
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/csdb')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from mrds.core import main as mrds_main
dag_id = "w_ODS_CSDB_RATINGS_FULL_CORE"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id,
}
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
OCI_BUCKET = os.getenv("INBOX_BUCKET")
OBJECT_PREFIX = os.getenv("OBJECT_PREFIX", "csdb/ratings/full/")
TEMP_DIR = "/tmp/csdb_ratings"
PROCESSED_TS_VAR = "w_ODS_CSDB_RATINGS_FULL_COORDINATOR__processed_objects_ts"
# CSV configurations
CSV_CONFIGS = [
{
"source_filename": "FULL_INSTRUMENT_DESCRIPTION.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_INSTR_DESC_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_INSTRUMENT_DESCRIPTION"
},
{
"source_filename": "FULL_INSTRUMENT_RATINGS.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_INSTR_RAT_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_INSTRUMENT_RATINGS"
},
{
"source_filename": "FULL_ISSUER_DESCRIPTION.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_ISSUER_DESC_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_ISSUER_DESCRIPTION"
},
{
"source_filename": "FULL_ISSUER_RATINGS.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_ISSUER_RAT_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_ISSUER_RATINGS"
}
]
def _oci_client():
import oci
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
try:
rp_signer = oci.auth.signers.get_resource_principals_signer()
cfg = {"region": region} if region else {}
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
except Exception as e:
logging.info("RP not available: %s", e)
try:
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
cfg = {"region": region} if region else {}
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
except Exception as e:
logging.info("IP not available: %s", e)
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
raise RuntimeError("Failed to create OCI client")
def _load_processed_map() -> dict[str, float]:
try:
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
m = json.loads(raw) or {}
if isinstance(m, dict):
return {k: float(v) for k, v in m.items()}
except Exception:
pass
return {}
def _save_processed_map(m: dict[str, float]) -> None:
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
def _mark_processed(zip_key: str, zip_mtime: float):
m = _load_processed_map()
m[zip_key] = float(zip_mtime)
_save_processed_map(m)
logging.info("Marked as processed: %s (mtime=%s)", zip_key, zip_mtime)
with DAG(
dag_id=dag_id,
default_args=default_args,
description='CSDB Ratings Full Processor: Processes one zip file with 4 CSV files in parallel',
schedule_interval=None, # Triggered by coordinator
catchup=False,
max_active_runs=3, # Allow some parallelism but controlled
render_template_as_native_obj=True,
tags=["CSDB", "PROCESSOR", "MRDS", "ODS", "OCI", "RATINGS"],
) as dag:
@af_task(task_id="get_zip_config")
def get_zip_config(**context):
"""Get the zip file configuration from dag_run conf"""
conf = context['dag_run'].conf or {}
zip_object_name = conf.get('zip_object_name')
zip_base_name = conf.get('zip_base_name')
zip_mtime = conf.get('zip_mtime')
sequence_number = conf.get('sequence_number', 0)
total_files = conf.get('total_files', 0)
if not all([zip_object_name, zip_base_name, zip_mtime]):
raise AirflowFailException("Missing required configuration: zip_object_name, zip_base_name, or zip_mtime")
logging.info(f"Processing zip file {sequence_number}/{total_files}: {zip_base_name}")
return {
"zip_object_name": zip_object_name,
"zip_base_name": zip_base_name,
"zip_mtime": zip_mtime,
"sequence_number": sequence_number,
"total_files": total_files
}
@af_task(task_id="download_and_unzip")
def download_and_unzip(config: dict):
"""Download and unzip the specific zip file"""
zip_key = config["zip_object_name"]
zip_base = config["zip_base_name"]
client = _oci_client()
os.makedirs(TEMP_DIR, exist_ok=True)
# Create unique temp directory for this run
run_temp_dir = os.path.join(TEMP_DIR, f"run_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
os.makedirs(run_temp_dir, exist_ok=True)
local_zip = os.path.join(run_temp_dir, zip_base)
logging.info("Downloading %s to %s", zip_key, local_zip)
get_obj = client.get_object(OCI_NAMESPACE, OCI_BUCKET, zip_key)
with open(local_zip, 'wb') as f:
for chunk in get_obj.data.raw.stream(1024 * 1024, decode_content=False):
f.write(chunk)
logging.info("Unzipping %s", local_zip)
with zipfile.ZipFile(local_zip, 'r') as zip_ref:
zip_ref.extractall(run_temp_dir)
extracted_files = []
for root, dirs, files in os.walk(run_temp_dir):
for file in files:
if file.endswith('.csv'):
full_path = os.path.join(root, file)
extracted_files.append({"filename": file, "path": full_path})
logging.info("Extracted CSV: %s", file)
logging.info("Total CSV files extracted: %d", len(extracted_files))
return {
"extracted_files": extracted_files,
"zip_config": config,
"temp_dir": run_temp_dir
}
@af_task(task_id="init_workflow")
def init_workflow(unzipped: dict):
"""Initialize MRDS workflow"""
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
ctx = get_current_context()
run_id = str(ctx['ti'].run_id)
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
extracted_files = unzipped.get("extracted_files", [])
zip_config = unzipped.get("zip_config", {})
temp_dir = unzipped.get("temp_dir")
task_configs = []
for csv_config in CSV_CONFIGS:
matching_file = next(
(ef for ef in extracted_files if ef["filename"] == csv_config["source_filename"]),
None
)
if matching_file:
task_configs.append({
"task_name": csv_config["task_name"],
"source_filename": csv_config["source_filename"],
"source_path": matching_file["path"],
"config_file": csv_config["config_yaml"],
})
logging.info("Prepared task config for %s", csv_config["source_filename"])
else:
logging.warning("CSV file %s not found in extracted files", csv_config["source_filename"])
return {
"workflow_history_key": a_workflow_history_key,
"task_configs": task_configs,
"zip_config": zip_config,
"temp_dir": temp_dir
}
def run_mrds_task(task_config: dict, **context):
"""Run MRDS processing for a single CSV file"""
ti = context['ti']
task_name = task_config["task_name"]
source_path = task_config["source_path"]
config_file = task_config["config_file"]
if not os.path.exists(config_file):
raise FileNotFoundError(f"Config file not found: {config_file}")
if not os.path.exists(source_path):
raise FileNotFoundError(f"Source CSV file not found: {source_path}")
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
workflow_history_key = init_bundle.get('workflow_history_key')
if not workflow_history_key:
raise AirflowFailException("No workflow_history_key from init_workflow")
try:
logging.info(f"{task_name}: Starting MRDS processing for {source_path}")
mrds_main(workflow_history_key, source_path, config_file, generate_workflow_context=False)
logging.info(f"{task_name}: MRDS processing completed successfully")
except Exception as e:
logging.exception(f"{task_name}: MRDS failed on {source_path}")
raise
return "SUCCESS"
def finalise_workflow_task(**context):
"""Finalize the workflow and mark zip as processed"""
ti = context['ti']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
a_workflow_history_key = init_bundle.get('workflow_history_key')
zip_config = init_bundle.get('zip_config', {})
if a_workflow_history_key is None:
raise AirflowFailException("No workflow history key; cannot finalise workflow")
# Check if any CSV task failed
csv_task_ids = [cfg["task_name"] for cfg in CSV_CONFIGS]
dag_run = context['dag_run']
tis = [t for t in dag_run.get_task_instances() if t.task_id in csv_task_ids]
from airflow.utils.state import State
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
if not any_failed:
# Mark zip as processed
zip_key = zip_config.get("zip_object_name")
zip_mtime = zip_config.get("zip_mtime")
if zip_key and zip_mtime:
_mark_processed(zip_key, zip_mtime)
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
else:
failed_tasks = [ti_i.task_id for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error("Finalised workflow %s as FAILED (failed tasks=%s)",
a_workflow_history_key, failed_tasks)
raise AirflowFailException(f"Workflow failed for tasks: {failed_tasks}")
@af_task(task_id="cleanup_temp_files")
def cleanup_temp_files(**context):
"""Clean up temporary files for this run"""
import shutil
ti = context['ti']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
temp_dir = init_bundle.get('temp_dir')
if temp_dir and os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
logging.info("Cleaned up temp directory: %s", temp_dir)
@af_task(task_id="move_zip_to_archive")
def move_zip_to_archive(**context):
"""Move processed zip file to archive"""
ti = context['ti']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
zip_config = init_bundle.get('zip_config', {})
zip_key = zip_config.get("zip_object_name")
if not zip_key:
logging.warning("No zip key found, skipping archive")
return
client = _oci_client()
archive_key = zip_key.replace(OBJECT_PREFIX, f"{OBJECT_PREFIX}archive/", 1)
try:
client.copy_object(
OCI_NAMESPACE,
OCI_BUCKET,
{
"sourceObjectName": zip_key,
"destinationRegion": os.getenv("OCI_REGION", "eu-frankfurt-1"),
"destinationNamespace": OCI_NAMESPACE,
"destinationBucket": OCI_BUCKET,
"destinationObjectName": archive_key
}
)
logging.info("Copied to archive: %s -> %s", zip_key, archive_key)
client.delete_object(OCI_NAMESPACE, OCI_BUCKET, zip_key)
logging.info("Deleted from inbox: %s", zip_key)
except Exception as e:
logging.error("Failed to archive zip file %s: %s", zip_key, e)
raise
# Build the DAG structure
config_task = get_zip_config()
unzip_task = download_and_unzip(config_task)
init_task = init_workflow(unzip_task)
# Create CSV processing tasks dynamically
csv_tasks = []
for csv_config in CSV_CONFIGS:
task = PythonOperator(
task_id=csv_config["task_name"],
python_callable=run_mrds_task,
op_kwargs={
"task_config": {
"task_name": csv_config["task_name"],
"source_filename": csv_config["source_filename"],
"source_path": "{{ ti.xcom_pull(task_ids='init_workflow')['task_configs'] | selectattr('task_name', 'equalto', '" + csv_config["task_name"] + "') | map(attribute='source_path') | first }}",
"config_file": csv_config["config_yaml"],
}
},
provide_context=True,
)
csv_tasks.append(task)
finalize_task = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
cleanup_task = cleanup_temp_files()
archive_task = move_zip_to_archive()
all_good = EmptyOperator(
task_id="All_went_well",
trigger_rule=TriggerRule.ALL_SUCCESS,
)
# Define task dependencies
config_task >> unzip_task >> init_task >> csv_tasks >> finalize_task >> [cleanup_task, archive_task] >> all_good
logging.info("CSDB Ratings Full Processor DAG ready")

View File

@@ -0,0 +1,29 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/BRANCH
archive_prefix: ARCHIVE/FXCD/BRANCH
workflow_name: w_ODS_FXCD_F_BRANCH
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_BRANCH_PARSE
ods_prefix: INBOX/FXCD/BRANCH/FXCD_F_BRANCH
output_table: FXCD_F_BRANCH
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'BRANCH_ID'
column_header: 'BRANCH_ID'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'CTP_ID'
column_header: 'CTP_ID'

View File

@@ -0,0 +1,53 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/CLEARER
archive_prefix: ARCHIVE/FXCD/CLEARER
workflow_name: w_ODS_FXCD_F_CLEARER
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_CLEARER_PARSE
ods_prefix: INBOX/FXCD/CLEARER/FXCD_F_CLEARER
output_table: FXCD_F_CLEARER
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CLEARER_ID'
column_header: 'CLEARER_ID'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'ELIGIBILITY_OF_FLAG'
column_header: 'ELIGIBILITY_OF_FLAG'
- type: 'csv_header'
value: 'ELIGIBILITY_FR_FLAG'
column_header: 'ELIGIBILITY_FR_FLAG'
- type: 'csv_header'
value: 'ACTIVE_FLAG'
column_header: 'ACTIVE_FLAG'
- type: 'csv_header'
value: 'OVERALL_OF_LIMIT_AMT'
column_header: 'OVERALL_OF_LIMIT_AMT'
- type: 'csv_header'
value: 'CASH_OF_LIMIT_AMT'
column_header: 'CASH_OF_LIMIT_AMT'
- type: 'csv_header'
value: 'SECURITIES_OF_LIMIT_AMT'
column_header: 'SECURITIES_OF_LIMIT_AMT'
- type: 'csv_header'
value: 'OVERALL_FR_LIMIT_AMT'
column_header: 'OVERALL_FR_LIMIT_AMT'
- type: 'csv_header'
value: 'CASH_FR_LIMIT_AMT'
column_header: 'CASH_FR_LIMIT_AMT'
- type: 'csv_header'
value: 'SECURITIES_FR_LIMIT_AMT'
column_header: 'SECURITIES_FR_LIMIT_AMT'

View File

@@ -0,0 +1,35 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/CLEARER_NCB_LIMIT
archive_prefix: ARCHIVE/FXCD/CLEARER_NCB_LIMIT
workflow_name: w_ODS_FXCD_F_CLEARER_NCB_LIMIT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_CLEARER_NCB_LIMIT_PARSE
ods_prefix: INBOX/FXCD/CLEARER_NCB_LIMIT/FXCD_F_CLEARER_NCB_LIMIT
output_table: FXCD_F_CLEARER_NCB_LIMIT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CLEARER_ID'
column_header: 'CLEARER_ID'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'OVERALL_LIMIT_AMT'
column_header: 'OVERALL_LIMIT_AMT'
- type: 'csv_header'
value: 'CASH_LIMIT_AMT'
column_header: 'CASH_LIMIT_AMT'
- type: 'csv_header'
value: 'SECURITIES_LIMIT_AMT'
column_header: 'SECURITIES_LIMIT_AMT'

View File

@@ -0,0 +1,29 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/CONSTANT
archive_prefix: ARCHIVE/FXCD/CONSTANT
workflow_name: w_ODS_FXCD_F_CONSTANT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_CONSTANT_PARSE
ods_prefix: INBOX/FXCD/CONSTANT/FXCD_F_CONSTANT
output_table: FXCD_F_CONSTANT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CONSTANT_NAME'
column_header: 'CONSTANT_NAME'
- type: 'csv_header'
value: 'CONSTANT_VALUE'
column_header: 'CONSTANT_VALUE'
- type: 'csv_header'
value: 'CONSTANT_TYPE'
column_header: 'CONSTANT_TYPE'

View File

@@ -0,0 +1,70 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/COUNTERPARTY
archive_prefix: ARCHIVE/FXCD/COUNTERPARTY
workflow_name: w_ODS_FXCD_F_COUNTERPARTY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_COUNTERPARTY_PARSE
ods_prefix: INBOX/FXCD/COUNTERPARTY/FXCD_F_COUNTERPARTY
output_table: FXCD_F_COUNTERPARTY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CTP_ID'
column_header: 'CTP_ID'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'CTP_GROUP_FKIT_CODE'
column_header: 'CTP_GROUP_FKIT_CODE'
- type: 'csv_header'
value: 'ACTIVE_FLAG'
column_header: 'ACTIVE_FLAG'
- type: 'csv_header'
value: 'CTP_COMMENT'
column_header: 'CTP_COMMENT'
- type: 'csv_header'
value: 'GUARANTOR_ID'
column_header: 'GUARANTOR_ID'
- type: 'csv_header'
value: 'OF_OVERALL_LMT_AMT'
column_header: 'OF_OVERALL_LMT_AMT'
- type: 'csv_header'
value: 'OF_MANUAL_FLAG'
column_header: 'OF_MANUAL_FLAG'
- type: 'csv_header'
value: 'FR_OVERALL_LMT_AMT'
column_header: 'FR_OVERALL_LMT_AMT'
- type: 'csv_header'
value: 'FR_MANUAL_FLAG'
column_header: 'FR_MANUAL_FLAG'
- type: 'csv_header'
value: 'MP_OVERALL_LMT_AMT'
column_header: 'MP_OVERALL_LMT_AMT'
- type: 'csv_header'
value: 'MP_MANUAL_FLAG'
column_header: 'MP_MANUAL_FLAG'
- type: 'csv_header'
value: 'EOI_CTP_GROUP'
column_header: 'EOI_CTP_GROUP'
- type: 'csv_header'
value: 'ART_101_FLA'
column_header: 'ART_101_FLA'
- type: 'csv_header'
value: 'MEDIAN_CAPITAL_FLAG'
column_header: 'MEDIAN_CAPITAL_FLAG'
- type: 'csv_header'
value: 'CHANGE_DESCRIPTION'
column_header: 'CHANGE_DESCRIPTION'

View File

@@ -0,0 +1,85 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/COUNTRY
archive_prefix: ARCHIVE/FXCD/COUNTRY
workflow_name: w_ODS_FXCD_F_COUNTRY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_COUNTRY_PARSE
ods_prefix: INBOX/FXCD/COUNTRY/FXCD_F_COUNTRY
output_table: FXCD_F_COUNTRY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'COUNTRY_GDP'
column_header: 'COUNTRY_GDP'
- type: 'csv_header'
value: 'NCB_USD_LAMBDA'
column_header: 'NCB_USD_LAMBDA'
- type: 'csv_header'
value: 'OF_FLAG'
column_header: 'OF_FLAG'
- type: 'csv_header'
value: 'FR_FLAG'
column_header: 'FR_FLAG'
- type: 'csv_header'
value: 'EU_FLAG'
column_header: 'EU_FLAG'
- type: 'csv_header'
value: 'EUROSYSTEM_FLAG'
column_header: 'EUROSYSTEM_FLAG'
- type: 'csv_header'
value: 'FR_PORTF_SHARE_OPTOUT'
column_header: 'FR_PORTF_SHARE_OPTOUT'
- type: 'csv_header'
value: 'FR_LIMIT_CALC_OPTOUT'
column_header: 'FR_LIMIT_CALC_OPTOUT'
- type: 'csv_header'
value: 'COUNTRY_COMMENT'
column_header: 'COUNTRY_COMMENT'
- type: 'csv_header'
value: 'OF_LMT_AMNT'
column_header: 'OF_LMT_AMNT'
- type: 'csv_header'
value: 'OF_MANUAL_FLAG'
column_header: 'OF_MANUAL_FLAG'
- type: 'csv_header'
value: 'FR_LMT_AMNT'
column_header: 'FR_LMT_AMNT'
- type: 'csv_header'
value: 'FR_MANUAL_FLAG'
column_header: 'FR_MANUAL_FLAG'
- type: 'csv_header'
value: 'USD_PORTFOLIO_EUR_SIZE'
column_header: 'USD_PORTFOLIO_EUR_SIZE'
- type: 'csv_header'
value: 'JPY_PORTFOLIO_EUR_SIZE'
column_header: 'JPY_PORTFOLIO_EUR_SIZE'
- type: 'csv_header'
value: 'CAPITAL_KEY_AMNT'
column_header: 'CAPITAL_KEY_AMNT'
- type: 'csv_header'
value: 'LAMBDA_MANUAL_FLAG'
column_header: 'LAMBDA_MANUAL_FLAG'
- type: 'csv_header'
value: 'CNY_PORTFOLIO_EUR_SIZE'
column_header: 'CNY_PORTFOLIO_EUR_SIZE'
- type: 'csv_header'
value: 'CHANGE_DESCRIPTION'
column_header: 'CHANGE_DESCRIPTION'

View File

@@ -0,0 +1,35 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/COUNTRY_NCB_LIMIT
archive_prefix: ARCHIVE/FXCD/COUNTRY_NCB_LIMIT
workflow_name: w_ODS_FXCD_F_COUNTRY_NCB_LIMIT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_COUNTRY_NCB_LIMIT_PARSE
ods_prefix: INBOX/FXCD/COUNTRY_NCB_LIMIT/FXCD_F_COUNTRY_NCB_LIMIT
output_table: FXCD_F_COUNTRY_NCB_LIMIT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'NCB_COUNTRY_ID'
column_header: 'NCB_COUNTRY_ID'
- type: 'csv_header'
value: 'USD_LIMIT_AMT'
column_header: 'USD_LIMIT_AMT'
- type: 'csv_header'
value: 'JPY_LIMIT_AMT'
column_header: 'JPY_LIMIT_AMT'
- type: 'csv_header'
value: 'TOTAL_LIMIT_AMT'
column_header: 'TOTAL_LIMIT_AMT'

View File

@@ -0,0 +1,35 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/CTP_GROUP
archive_prefix: ARCHIVE/FXCD/CTP_GROUP
workflow_name: w_ODS_FXCD_F_CTP_GROUP
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_CTP_GROUP_PARSE
ods_prefix: INBOX/FXCD/CTP_GROUP/FXCD_F_CTP_GROUP
output_table: FXCD_F_CTP_GROUP
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CTP_GROUP_FKIT_CODE'
column_header: 'CTP_GROUP_FKIT_CODE'
- type: 'csv_header'
value: 'CTP_GROUP_NAME'
column_header: 'CTP_GROUP_NAME'
- type: 'csv_header'
value: 'OF_MAX_LIMIT_AMT'
column_header: 'OF_MAX_LIMIT_AMT'
- type: 'csv_header'
value: 'FR_MP_MAX_LIMIT_AMT'
column_header: 'FR_MP_MAX_LIMIT_AMT'
- type: 'csv_header'
value: 'ACTIVE'
column_header: 'ACTIVE'

View File

@@ -0,0 +1,32 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/CTP_GROUP_NCB_LIMIT
archive_prefix: ARCHIVE/FXCD/CTP_GROUP_NCB_LIMIT
workflow_name: w_ODS_FXCD_F_CTP_GROUP_NCB_LIMIT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_CTP_GROUP_NCB_LIMIT_PARSE
ods_prefix: INBOX/FXCD/CTP_GROUP_NCB_LIMIT/FXCD_F_CTP_GROUP_NCB_LIMIT
output_table: FXCD_F_CTP_GROUP_NCB_LIMIT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'CTP_GROUP_FKIT_CODE'
column_header: 'CTP_GROUP_FKIT_CODE'
- type: 'csv_header'
value: 'ELIGIBILITY_ID'
column_header: 'ELIGIBILITY_ID'
- type: 'csv_header'
value: 'LIMIT_AMT'
column_header: 'LIMIT_AMT'

View File

@@ -0,0 +1,32 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/CTP_NCB_LIMIT
archive_prefix: ARCHIVE/FXCD/CTP_NCB_LIMIT
workflow_name: w_ODS_FXCD_F_CTP_NCB_LIMIT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_CTP_NCB_LIMIT_PARSE
ods_prefix: INBOX/FXCD/CTP_NCB_LIMIT/FXCD_F_CTP_NCB_LIMIT
output_table: FXCD_F_CTP_NCB_LIMIT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'ELIGIBILITY_ID'
column_header: 'ELIGIBILITY_ID'
- type: 'csv_header'
value: 'CTP_ID'
column_header: 'CTP_ID'
- type: 'csv_header'
value: 'LIMIT_AMT'
column_header: 'LIMIT_AMT'

View File

@@ -0,0 +1,78 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ELIGIBILITY
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY
workflow_name: w_ODS_FXCD_F_ELIGIBILITY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ELIGIBILITY_PARSE
ods_prefix: INBOX/FXCD/ELIGIBILITY/FXCD_F_ELIGIBILITY
output_table: FXCD_F_ELIGIBILITY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'ELIGIBILITY_ID'
column_header: 'ELIGIBILITY_ID'
- type: 'csv_header'
value: 'ELIGIBILITY_TYPE'
column_header: 'ELIGIBILITY_TYPE'
- type: 'csv_header'
value: 'ELIGIBILITY_NAME'
column_header: 'ELIGIBILITY_NAME'
- type: 'csv_header'
value: 'LIMIT_PERCENTAGE'
column_header: 'LIMIT_PERCENTAGE'
- type: 'csv_header'
value: 'LIMIT_AMT_MANUAL_FLAG'
column_header: 'LIMIT_AMT_MANUAL_FLAG'
- type: 'csv_header'
value: 'LIMIT_AMT'
column_header: 'LIMIT_AMT'
- type: 'csv_header'
value: 'NCB_LMT_AMT_FLOOR'
column_header: 'NCB_LMT_AMT_FLOOR'
- type: 'csv_header'
value: 'RATING_THRESHOLD'
column_header: 'RATING_THRESHOLD'
- type: 'csv_header'
value: 'FKIT_SUBLIMIT_NAME'
column_header: 'FKIT_SUBLIMIT_NAME'
- type: 'csv_header'
value: 'ELIGIBILITY_ORDER'
column_header: 'ELIGIBILITY_ORDER'
- type: 'csv_header'
value: 'LIMIT_DISTRIBUTION'
column_header: 'LIMIT_DISTRIBUTION'
- type: 'csv_header'
value: 'CTP_MIN_REQ_RATINGS'
column_header: 'CTP_MIN_REQ_RATINGS'
- type: 'csv_header'
value: 'LIMITS_CALCULATION'
column_header: 'LIMITS_CALCULATION'
- type: 'csv_header'
value: 'ART_101_FLAG'
column_header: 'ART_101_FLAG'
- type: 'csv_header'
value: 'DEFINITION'
column_header: 'DEFINITION'
- type: 'csv_header'
value: 'FOR_CP_FLAG'
column_header: 'FOR_CP_FLAG'
- type: 'csv_header'
value: 'FOR_CPG_FLAG'
column_header: 'FOR_CPG_FLAG'
- type: 'csv_header'
value: 'OVERALL_LIMIT_FLAG'
column_header: 'OVERALL_LIMIT_FLAG'
- type: 'csv_header'
value: 'IDENTIFIER'
column_header: 'IDENTIFIER'

View File

@@ -0,0 +1,35 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ELIGIBILITY_GROUP_MAP
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_GROUP_MAP
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_GROUP_MAP
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ELIGIBILITY_GROUP_MAP_PARSE
ods_prefix: INBOX/FXCD/ELIGIBILITY_GROUP_MAP/FXCD_F_ELIGIBILITY_GROUP_MAP
output_table: FXCD_F_ELIGIBILITY_GROUP_MAP
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CTP_GROUP_FKIT_CODE'
column_header: 'CTP_GROUP_FKIT_CODE'
- type: 'csv_header'
value: 'ELIGIBILITY_ID'
column_header: 'ELIGIBILITY_ID'
- type: 'csv_header'
value: 'OF_LIMIT_AMT'
column_header: 'OF_LIMIT_AMT'
- type: 'csv_header'
value: 'FR_MP_LIMIT_AMT'
column_header: 'FR_MP_LIMIT_AMT'
- type: 'csv_header'
value: 'LIMIT_AMT'
column_header: 'LIMIT_AMT'

View File

@@ -0,0 +1,44 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_ISSUER
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_ISSUER
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ELIGIBILITY_ISSUER_PARSE
ods_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER/FXCD_F_ELIGIBILITY_ISSUER
output_table: FXCD_F_ELIGIBILITY_ISSUER
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'ELIGIBILITY_ISSUER_ID'
column_header: 'ELIGIBILITY_ISSUER_ID'
- type: 'csv_header'
value: 'ELIGIBILITY_ISSUER_NAME'
column_header: 'ELIGIBILITY_ISSUER_NAME'
- type: 'csv_header'
value: 'ART_101_FLAG'
column_header: 'ART_101_FLAG'
- type: 'csv_header'
value: 'RATING_THRESHOLD'
column_header: 'RATING_THRESHOLD'
- type: 'csv_header'
value: 'DEFINITION'
column_header: 'DEFINITION'
- type: 'csv_header'
value: 'RATING_TERM_TYPE'
column_header: 'RATING_TERM_TYPE'
- type: 'csv_header'
value: 'RATING_UPLIFT_FACTOR'
column_header: 'RATING_UPLIFT_FACTOR'
- type: 'csv_header'
value: 'ELIG_FLAG'
column_header: 'ELIG_FLAG'

View File

@@ -0,0 +1,32 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER_MAP
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_ISSUER_MAP
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_ISSUER_MAP
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ELIGIBILITY_ISSUER_MAP_PARSE
ods_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER_MAP/FXCD_F_ELIGIBILITY_ISSUER_MAP
output_table: FXCD_F_ELIGIBILITY_ISSUER_MAP
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'ISSUER_ID'
column_header: 'ISSUER_ID'
- type: 'csv_header'
value: 'ELIGIBILITY_ISSUER_ID'
column_header: 'ELIGIBILITY_ISSUER_ID'
- type: 'csv_header'
value: 'ELEGIBLE_FLAG'
column_header: 'ELEGIBLE_FLAG'
- type: 'csv_header'
value: 'MANUAL_ELIGIBLE_FLAG'
column_header: 'MANUAL_ELIGIBLE_FLAG'

View File

@@ -0,0 +1,41 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ELIGIBILITY_MAP
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_MAP
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_MAP
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ELIGIBILITY_MAP_PARSE
ods_prefix: INBOX/FXCD/ELIGIBILITY_MAP/FXCD_F_ELIGIBILITY_MAP
output_table: FXCD_F_ELIGIBILITY_MAP
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'CTP_ID'
column_header: 'CTP_ID'
- type: 'csv_header'
value: 'ELIGIBILITY_ID'
column_header: 'ELIGIBILITY_ID'
- type: 'csv_header'
value: 'OF_LIMIT_AMT'
column_header: 'OF_LIMIT_AMT'
- type: 'csv_header'
value: 'OF_FLAG'
column_header: 'OF_FLAG'
- type: 'csv_header'
value: 'FR_LIMIT_AMT'
column_header: 'FR_LIMIT_AMT'
- type: 'csv_header'
value: 'FR_MP_FLAG'
column_header: 'FR_MP_FLAG'
- type: 'csv_header'
value: 'LIMIT_AMT'
column_header: 'LIMIT_AMT'

View File

@@ -0,0 +1,56 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ENTITY
archive_prefix: ARCHIVE/FXCD/ENTITY
workflow_name: w_ODS_FXCD_F_ENTITY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ENTITY_PARSE
ods_prefix: INBOX/FXCD/ENTITY/FXCD_F_ENTITY
output_table: FXCD_F_ENTITY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'FKIT_CODE'
column_header: 'FKIT_CODE'
- type: 'csv_header'
value: 'ENTITY_NAME'
column_header: 'ENTITY_NAME'
- type: 'csv_header'
value: 'BVD_BANK_INDEX_NUMBER'
column_header: 'BVD_BANK_INDEX_NUMBER'
- type: 'csv_header'
value: 'RISK_COUNTRY_ID'
column_header: 'RISK_COUNTRY_ID'
- type: 'csv_header'
value: 'ST_MANUAL_RATING_FLAG'
column_header: 'ST_MANUAL_RATING_FLAG'
- type: 'csv_header'
value: 'LT_MANUAL_RATING_FLAG'
column_header: 'LT_MANUAL_RATING_FLAG'
- type: 'csv_header'
value: 'ST_SECOND_BEST_RATING'
column_header: 'ST_SECOND_BEST_RATING'
- type: 'csv_header'
value: 'LT_SECOND_BEST_RATING'
column_header: 'LT_SECOND_BEST_RATING'
- type: 'csv_header'
value: 'CAP_TIER1_AMT'
column_header: 'CAP_TIER1_AMT'
- type: 'csv_header'
value: 'CAP_EQUITY_AMT'
column_header: 'CAP_EQUITY_AMT'
- type: 'csv_header'
value: 'BLOOMBERG_TICKER'
column_header: 'BLOOMBERG_TICKER'

View File

@@ -0,0 +1,35 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/EQUIVALENCE_RULE
archive_prefix: ARCHIVE/FXCD/EQUIVALENCE_RULE
workflow_name: w_ODS_FXCD_F_EQUIVALENCE_RULE
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_EQUIVALENCE_RULE_PARSE
ods_prefix: INBOX/FXCD/EQUIVALENCE_RULE/FXCD_F_EQUIVALENCE_RULE
output_table: FXCD_F_EQUIVALENCE_RULE
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RATING_TERM_TYPE'
column_header: 'RATING_TERM_TYPE'
- type: 'csv_header'
value: 'AGENCY_ID'
column_header: 'AGENCY_ID'
- type: 'csv_header'
value: 'AGENCY_RATING'
column_header: 'AGENCY_RATING'
- type: 'csv_header'
value: 'NUMERICAL_EQUIVALENCE'
column_header: 'NUMERICAL_EQUIVALENCE'
- type: 'csv_header'
value: 'RATING_FACTOR'
column_header: 'RATING_FACTOR'

View File

@@ -0,0 +1,77 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ISSUER
archive_prefix: ARCHIVE/FXCD/ISSUER
workflow_name: w_ODS_FXCD_F_ISSUER
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ISSUER_PARSE
ods_prefix: INBOX/FXCD/ISSUER/FXCD_F_ISSUER
output_table: FXCD_F_ISSUER
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'ISSUER_ID'
column_header: 'ISSUER_ID'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'ACTIVE_FLAG'
column_header: 'ACTIVE_FLAG'
- type: 'csv_header'
value: 'ISSUER_COMMENT'
column_header: 'ISSUER_COMMENT'
- type: 'csv_header'
value: 'OFM_ISSUER_LMT'
column_header: 'OFM_ISSUER_LMT'
- type: 'csv_header'
value: 'OF_MANUAL_FLAG'
column_header: 'OF_MANUAL_FLAG'
- type: 'csv_header'
value: 'GLOBAL_LMT_AMNT'
column_header: 'GLOBAL_LMT_AMNT'
- type: 'csv_header'
value: 'MANUAL_GLOBAL_LMT_FLAG'
column_header: 'MANUAL_GLOBAL_LMT_FLAG'
- type: 'csv_header'
value: 'OF_ISSUER_CATEGORY'
column_header: 'OF_ISSUER_CATEGORY'
- type: 'csv_header'
value: 'FR_ISSUER_CATEGORY'
column_header: 'FR_ISSUER_CATEGORY'
- type: 'csv_header'
value: 'ISSUER_LMT'
column_header: 'ISSUER_LMT'
- type: 'csv_header'
value: 'FR_MANUAL_FLAG'
column_header: 'FR_MANUAL_FLAG'
- type: 'csv_header'
value: 'NCB_USD_LAMBDA'
column_header: 'NCB_USD_LAMBDA'
- type: 'csv_header'
value: 'ART_101_FLAG'
column_header: 'ART_101_FLAG'
- type: 'csv_header'
value: 'UPLIFT_IMPLIED_RATING'
column_header: 'UPLIFT_IMPLIED_RATING'
- type: 'csv_header'
value: 'LAMBDA_MANUAL_FLAG'
column_header: 'LAMBDA_MANUAL_FLAG'
- type: 'csv_header'
value: 'CHANGE_DESCRIPTION'
column_header: 'CHANGE_DESCRIPTION'
- type: 'csv_header'
value: 'LT_FRM_LIMIT_RATIO'
column_header: 'LT_FRM_LIMIT_RATIO'
- type: 'csv_header'
value: 'LT_FRM_LIMIT_RATIO_MANUAL_FLAG'
column_header: 'LT_FRM_LIMIT_RATIO_MANUAL_FLAG'

View File

@@ -0,0 +1,38 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/ISSUER_NCB_LIMIT
archive_prefix: ARCHIVE/FXCD/ISSUER_NCB_LIMIT
workflow_name: w_ODS_FXCD_F_ISSUER_NCB_LIMIT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_ISSUER_NCB_LIMIT_PARSE
ods_prefix: INBOX/FXCD/ISSUER_NCB_LIMIT/FXCD_F_ISSUER_NCB_LIMIT
output_table: FXCD_F_ISSUER_NCB_LIMIT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'ISSUER_ID'
column_header: 'ISSUER_ID'
- type: 'csv_header'
value: 'USD_LIMIT_AMT'
column_header: 'USD_LIMIT_AMT'
- type: 'csv_header'
value: 'JPY_LIMIT_AMT'
column_header: 'JPY_LIMIT_AMT'
- type: 'csv_header'
value: 'TOTAL_LIMIT_AMT'
column_header: 'TOTAL_LIMIT_AMT'
- type: 'csv_header'
value: 'LIMIT_TYPE'
column_header: 'LIMIT_TYPE'

View File

@@ -0,0 +1,29 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/LIQUIDITY_NCB_LIMIT
archive_prefix: ARCHIVE/FXCD/LIQUIDITY_NCB_LIMIT
workflow_name: w_ODS_FXCD_F_LIQUIDITY_NCB_LIMIT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_LIQUIDITY_NCB_LIMIT_PARSE
ods_prefix: INBOX/FXCD/LIQUIDITY_NCB_LIMIT/FXCD_F_LIQUIDITY_NCB_LIMIT
output_table: FXCD_F_LIQUIDITY_NCB_LIMIT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'USD_NCB_MIN_LIMIT_AMT'
column_header: 'USD_NCB_MIN_LIMIT_AMT'
- type: 'csv_header'
value: 'USD_NCB_MAX_LIMIT_AMT'
column_header: 'USD_NCB_MAX_LIMIT_AMT'

View File

@@ -0,0 +1,47 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/NCB_PORTFOLIO_SHARE
archive_prefix: ARCHIVE/FXCD/NCB_PORTFOLIO_SHARE
workflow_name: w_ODS_FXCD_F_NCB_PORTFOLIO_SHARE
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_NCB_PORTFOLIO_SHARE_PARSE
ods_prefix: INBOX/FXCD/NCB_PORTFOLIO_SHARE/FXCD_F_NCB_PORTFOLIO_SHARE
output_table: FXCD_F_NCB_PORTFOLIO_SHARE
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'COUNTRY_ID'
column_header: 'COUNTRY_ID'
- type: 'csv_header'
value: 'COUNTRY_SHARE'
column_header: 'COUNTRY_SHARE'
- type: 'csv_header'
value: 'USD_COUNTRY_SHARE'
column_header: 'USD_COUNTRY_SHARE'
- type: 'csv_header'
value: 'JPY_COUNTRY_SHARE'
column_header: 'JPY_COUNTRY_SHARE'
- type: 'csv_header'
value: 'SCALED_COUNTRY_SHARE'
column_header: 'SCALED_COUNTRY_SHARE'
- type: 'csv_header'
value: 'SCALED_USD_COUNTRY_SHARE'
column_header: 'SCALED_USD_COUNTRY_SHARE'
- type: 'csv_header'
value: 'SCALED_JPY_COUNTRY_SHARE'
column_header: 'SCALED_JPY_COUNTRY_SHARE'
- type: 'csv_header'
value: 'CNY_COUNTRY_SHARE'
column_header: 'CNY_COUNTRY_SHARE'
- type: 'csv_header'
value: 'SCALED_CNY_COUNTRY_SHARE'
column_header: 'SCALED_CNY_COUNTRY_SHARE'

View File

@@ -0,0 +1,32 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/RATING
archive_prefix: ARCHIVE/FXCD/RATING
workflow_name: w_ODS_FXCD_F_RATING
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_RATING_PARSE
ods_prefix: INBOX/FXCD/RATING/FXCD_F_RATING
output_table: FXCD_F_RATING
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'ENTITY_ID'
column_header: 'ENTITY_ID'
- type: 'csv_header'
value: 'RATING_TERM_TYPE'
column_header: 'RATING_TERM_TYPE'
- type: 'csv_header'
value: 'AGENCY_ID'
column_header: 'AGENCY_ID'
- type: 'csv_header'
value: 'AGENCY_RATING'
column_header: 'AGENCY_RATING'

View File

@@ -0,0 +1,29 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/FXCD/RATING_AGENCY
archive_prefix: ARCHIVE/FXCD/RATING_AGENCY
workflow_name: w_ODS_FXCD_F_RATING_AGENCY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_FXCD_F_RATING_AGENCY_PARSE
ods_prefix: INBOX/FXCD/RATING_AGENCY/FXCD_F_RATING_AGENCY
output_table: FXCD_F_RATING_AGENCY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'AGENCY_ID'
column_header: 'AGENCY_ID'
- type: 'csv_header'
value: 'AGENCY_NAME'
column_header: 'AGENCY_NAME'
- type: 'csv_header'
value: 'ENABLED_FLAG'
column_header: 'ENABLED_FLAG'

View File

@@ -0,0 +1,129 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns="http://escb.ecb.int/csm-adjustment"
xmlns:lm="http://exdi.ecb.int/lm"
targetNamespace="http://escb.ecb.int/csm-adjustment"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:import namespace="http://exdi.ecb.int/lm" schemaLocation="../../lm_common/lm.xsd" />
<xs:element name="adjustmentMessages">
<xs:complexType>
<xs:choice>
<xs:element ref="csmAdjustmentMessage" />
<xs:element ref="quarterlyRevaluationAdjustmentMessage" />
<xs:element ref="adhocAdjustmentMessage" />
</xs:choice>
</xs:complexType>
</xs:element>
<xs:element name="csmAdjustmentMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="header">
<xs:complexType>
<xs:sequence>
<xs:element name="year" type="xs:gYear" />
<xs:element name="month" type="month" />
<xs:element name="version" type="lm:positiveInt" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adjustment" type="adjustmentSingleForecast" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="quarterlyRevaluationAdjustmentMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="header">
<xs:complexType>
<xs:sequence>
<xs:element name="year" type="xs:gYear" />
<xs:element name="quarter" type="quarter" />
<xs:element name="version" type="lm:positiveInt" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adhocAdjustmentMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="header">
<xs:complexType>
<xs:sequence>
<xs:element name="date" type="xs:date" />
<xs:element name="version" type="lm:positiveInt" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:complexType name="baseAdjustment">
<xs:sequence>
<xs:element name="country" type="lm:isoCode" />
<xs:element name="effectiveDate" type="xs:date" />
<xs:element name="lastDateNotInForecast" type="xs:date" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="adjustmentSingleForecast">
<xs:complexContent>
<xs:extension base="baseAdjustment">
<xs:sequence>
<xs:element name="forecastItem" type="forecastItem" />
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="adjustmentMultipleForecasts">
<xs:complexContent>
<xs:extension base="baseAdjustment">
<xs:sequence>
<xs:element name="forecastItem" type="forecastItem" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="forecastItem">
<xs:sequence>
<xs:element name="forecastName" type="lm:forecastName" />
<xs:element name="adjustmentAmount" type="extendedDecimalEuroValue" />
</xs:sequence>
</xs:complexType>
<xs:simpleType name="extendedDecimalEuroValue">
<xs:restriction base="xs:decimal">
<xs:totalDigits value="17" />
<xs:fractionDigits value="8" />
<xs:minInclusive value="-999999999.99999999" />
<xs:maxInclusive value="999999999.99999999" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="month">
<xs:restriction base="xs:int">
<xs:minInclusive value="1" />
<xs:maxInclusive value="12" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="quarter">
<xs:restriction base="xs:int">
<xs:minInclusive value="1" />
<xs:maxInclusive value="4" />
</xs:restriction>
</xs:simpleType>
</xs:schema>

View File

@@ -0,0 +1,78 @@
tmpdir: /tmp
inbox_prefix: INBOX/LM/AdhocAdjustment
archive_prefix: ARCHIVE/LM/AdhocAdjustment
workflow_name: w_ODS_LM_ADHOC_ADJUSTMENT_MSG
validation_schema_path: '/opt/airflow/src/airflow/dags/ods/lm/adhoc_adjustments/config/adhoc_adjustments.xsd'
file_type: xml
# List of tasks
tasks:
- task_name: m_ODS_LM_ADHOC_ADJUSTMENTS_HEADER_PARSE
ods_prefix: INBOX/LM/AdhocAdjustment/LM_ADHOC_ADJUSTMENTS_HEADER
output_table: LM_ADHOC_ADJUSTMENTS_HEADER
namespaces:
ns2: 'http://escb.ecb.int/csm-adjustment'
output_columns:
- type: 'xpath_element_id'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:header/ns2:date'
column_header: 'ADJUSTMENT_DATE'
is_key: 'N'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:header/ns2:version'
column_header: 'VERSION'
is_key: 'N'
- task_name: m_ODS_LM_ADHOC_ADJUSTMENTS_ITEM_HEADER_PARSE
ods_prefix: INBOX/LM/AdhocAdjustment/LM_ADHOC_ADJUSTMENTS_ITEM_HEADER
output_table: LM_ADHOC_ADJUSTMENTS_ITEM_HEADER
namespaces:
ns2: 'http://escb.ecb.int/csm-adjustment'
output_columns:
- type: 'xpath_element_id'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath_element_id'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage'
column_header: 'A_HEADER_FK'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:country'
column_header: 'COUNTRY'
is_key: 'N'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:effectiveDate'
column_header: 'EFFECTIVE_DATE'
is_key: 'N'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:lastDateNotInForecast'
column_header: 'LAST_DATE_NOT_FORECAST'
is_key: 'N'
- task_name: m_ODS_LM_ADHOC_ADJUSTMENTS_ITEM_PARSE
ods_prefix: INBOX/LM/AdhocAdjustment/LM_ADHOC_ADJUSTMENTS_ITEM
output_table: LM_ADHOC_ADJUSTMENTS_ITEM
namespaces:
ns2: 'http://escb.ecb.int/csm-adjustment'
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath_element_id'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment'
column_header: 'A_HEADER_FK'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:forecastItem/ns2:forecastName'
column_header: 'FORECAST_NAME'
is_key: 'N'
- type: 'xpath'
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:forecastItem/ns2:adjustmentAmount'
column_header: 'ADJUSTMENT_AMOUNT'
is_key: 'N'

View File

@@ -0,0 +1,520 @@
# dags/w_ODS_LM_ADHOC_ADJUSTMENT_MSG.py
# Idempotent, per-object mtime tracking
import sys
import os
import json
import logging
from pathlib import Path
from datetime import timedelta, datetime, timezone
from email.utils import parsedate_to_datetime
from airflow import DAG
from airflow.models import Variable
from airflow.decorators import task as af_task
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.operators.empty import EmptyOperator
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
# Import libs
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from mrds.core import main as mrds_main
dag_id = Path(__file__).stem
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id,
}
# OCI settings
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
OCI_BUCKET = os.getenv("INBOX_BUCKET")
# Config YAML (single config for all files)
CONFIG_YAML = os.getenv(
"EXDI_SINGLE_CONFIG_YAML",
"/opt/airflow/src/airflow/dags/ods/lm/adhoc_adjustments/config/m_ODS_LM_ADHOC_ADJUSTMENT_PARSE.yaml",
)
logging.info("Using EXDI_SINGLE_CONFIG_YAML=%s", CONFIG_YAML)
# Idempotency controls
REPROCESS = (os.getenv("EXDI_REPROCESS", "false").lower() in ("1", "true", "yes"))
LAST_TS_VAR = f"{dag_id}__last_seen_ts" # legacy watermark (kept for observability)
PROCESSED_SET_VAR = f"{dag_id}__processed_objects" # legacy: list of keys (back-compat only)
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts" # NEW: map key -> last processed mtime (epoch float)
# Helpers
def _oci_client():
"""
Create an OCI Object Storage client.
Order: Resource Principals -> Instance Principals.
"""
import oci
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
# RP
try:
rp_signer = oci.auth.signers.get_resource_principals_signer()
cfg = {"region": region} if region else {}
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
except Exception as e:
logging.info("RP not available: %s", e)
# IP
try:
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
cfg = {"region": region} if region else {}
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
except Exception as e:
logging.info("IP not available: %s", e)
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
raise RuntimeError("Failed to create OCI client")
def _load_yaml(cfg_path: str) -> dict:
import yaml
p = Path(cfg_path)
if not p.exists():
raise FileNotFoundError(f"Config YAML not found: {cfg_path}")
return yaml.safe_load(p.read_text()) or {}
# Build config-derived constants directly from YAML
try:
CONFIG_DATA = _load_yaml(CONFIG_YAML)
OBJECT_PREFIX = CONFIG_DATA.get("inbox_prefix")
if not (isinstance(OBJECT_PREFIX, str) and OBJECT_PREFIX.strip()):
raise AirflowFailException("YAML must define 'inbox_prefix' for OBJECT_PREFIX.")
OBJECT_PREFIX = OBJECT_PREFIX.strip()
logging.info("YAML inbox_prefix -> OBJECT_PREFIX: %s", OBJECT_PREFIX)
except Exception as e:
logging.error("Failed to resolve OBJECT_PREFIX from YAML %s: %s", CONFIG_YAML, e)
OBJECT_PREFIX = None
# New idempotency map (key -> last_processed_ts)
def _load_processed_map() -> dict[str, float]:
"""
Returns {object_key: last_processed_ts}.
Back-compat: if old set variable exists (list), treat those keys as ts=0.
"""
try:
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
m = json.loads(raw) or {}
if isinstance(m, dict):
return {k: float(v) for k, v in m.items()}
except Exception:
pass
# Back-compat: migrate old set/list
try:
old = json.loads(Variable.get(PROCESSED_SET_VAR, default_var="[]"))
if isinstance(old, list):
return {k: 0.0 for k in old}
except Exception:
pass
return {}
def _save_processed_map(m: dict[str, float]) -> None:
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
def _mark_processed_ts(objs: list[tuple[str, float]]):
"""
Update processed map with list of (object_key, mtime).
"""
if REPROCESS or not objs:
return
m = _load_processed_map()
for key, ts in objs:
try:
ts = float(ts)
except Exception:
continue
prev = float(m.get(key, 0.0))
if ts > prev:
m[key] = ts
_save_processed_map(m)
logging.info("Processed map updated; size=%d", len(m))
# Object listing (per-key mtime)
def _list_new_xml_objects(prefix: str) -> list[dict]:
"""
List .xml objects and decide inclusion per-object:
include if REPROCESS or object_mtime > processed_map.get(object_key, 0.0)
Returns: [{"name": "<full-key>", "base": "<file.xml>", "mtime": <epoch float>}]
"""
if not OCI_NAMESPACE or not OCI_BUCKET:
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
client = _oci_client()
processed_map = _load_processed_map()
try:
last_seen = float(Variable.get(LAST_TS_VAR, default_var="0"))
except Exception:
last_seen = 0.0
logging.info("Watermark last_seen=%s; processed_map_count=%d; prefix=%s",
last_seen, len(processed_map), prefix)
# NOTE: add pagination if needed
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=prefix)
new_items: list[dict] = []
newest_ts = last_seen
for o in (resp.data.objects or []):
name = (o.name or "").strip()
base = name.rsplit("/", 1)[-1] if name else ""
logging.info("Processing object: %s", base)
# Skip folder markers / empty keys
if not name or name.endswith('/') or not base:
logging.debug("Skip: folder marker or empty key: %r", name)
continue
if not base.lower().endswith(".xml"):
logging.debug("Skip: not .xml: %r", name)
continue
# Resolve mtime
ts = None
t = getattr(o, "time_created", None)
if t:
try:
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
except Exception:
ts = None
if ts is None:
try:
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
if lm:
dt = parsedate_to_datetime(lm)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
ts = dt.timestamp()
logging.debug("Resolved ts via HEAD Last-Modified for %s: %s", name, ts)
except Exception as e:
logging.warning("head_object failed for %s: %s", name, e)
if ts is None:
ts = datetime.now(timezone.utc).timestamp()
logging.warning("Object %s missing timestamp; falling back to now=%s", name, ts)
last_proc_ts = float(processed_map.get(name, 0.0))
include = REPROCESS or (ts > last_proc_ts)
logging.info(
"Decision for %s: obj_ts=%s, last_proc_ts=%s, REPROCESS=%s -> include=%s",
name, ts, last_proc_ts, REPROCESS, include
)
if not include:
continue
item = {"name": name, "base": base, "mtime": ts}
new_items.append(item)
if ts > newest_ts:
newest_ts = ts
# Watermark advanced for visibility (optional)
if not REPROCESS and new_items and newest_ts > last_seen:
Variable.set(LAST_TS_VAR, str(newest_ts))
logging.info("Advanced watermark from %s to %s", last_seen, newest_ts)
new_items.sort(key=lambda x: x["mtime"]) # ascending
logging.info("Found %d candidate .xml object(s) under prefix %s", len(new_items), prefix)
return new_items
# DAG
with DAG(
dag_id=dag_id,
default_args=default_args,
description='EXDI workflow (polling): single YAML config for all XML files in OCI',
schedule_interval=None, # Run EVERY 10 MIN
catchup=False,
max_active_runs=1,
render_template_as_native_obj=True,
tags=["EXDI", "MRDS", "ODS", "OCI", "ADHOC_ADJUSTMENT"],
) as dag:
@af_task(task_id="poll_oci_for_xml")
def poll_oci_for_xml():
"""
Lists new .xml objects and prepares a workload list.
Returns {"workload": [{"object": "<key>", "base": "<file.xml>", "mtime": <float>} ...]}
"""
if not OBJECT_PREFIX:
raise AirflowFailException("No OCI object prefix configured. Check YAML 'inbox_prefix'.")
new_objs = _list_new_xml_objects(OBJECT_PREFIX)
logging.info("New .xml objects found: %s", json.dumps(new_objs, indent=2))
print("New .xml objects found:", json.dumps(new_objs, indent=2))
# already contains base + mtime
workload = [{"object": it["name"], "base": it["base"], "mtime": it["mtime"]} for it in new_objs]
logging.info("Prepared workload items: %d", len(workload))
print("Prepared workload:", json.dumps(workload, indent=2))
return {"workload": workload}
@af_task(task_id="init_workflow")
def init_workflow(polled: dict):
"""Initialize workflow; start MRDS workflow; build per-file task configs."""
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing = []
if not username: missing.append("MRDS_LOADER_DB_USER")
if not password: missing.append("MRDS_LOADER_DB_PASS")
if not tnsalias: missing.append("MRDS_LOADER_DB_TNS")
raise AirflowFailException(f"Missing required env vars: {', '.join(missing)}")
workload = (polled or {}).get("workload") or []
# Airflow context for run_id
from airflow.operators.python import get_current_context
ctx = get_current_context()
run_id = str(ctx['ti'].run_id)
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
workflow_context = {
"run_id": run_id,
"a_workflow_history_key": a_workflow_history_key
}
# Build TASK_CONFIGS dynamically: one per file, sequential numbering
task_base_name = "m_ODS_LM_ADHOC_ADJUSTMENT"
task_configs = []
for idx, w in enumerate(workload, start=1):
task_configs.append({
"task_name": f"{task_base_name}_{idx}",
"source_filename": w["base"], # pass basename to MRDS (adjust if you need full key)
"config_file": CONFIG_YAML,
})
bundle = {
"workflow_history_key": a_workflow_history_key,
"workflow_context": workflow_context,
"workload": workload, # includes object + mtime
"task_configs": task_configs, # list-of-dicts for mapping
"env": env,
}
logging.info("Init complete; workload=%d, tasks=%d", len(workload), len(task_configs))
return bundle
@af_task(task_id="get_task_configs")
def get_task_configs(init_bundle: dict):
return init_bundle["task_configs"]
def run_mrds_task(task_name: str, source_filename: str, config_file: str, **context):
"""Run MRDS for a single file (sequential via mapped task with max_active_tis_per_dag=1)."""
ti = context['ti']
if not os.path.exists(config_file):
raise FileNotFoundError(f"Config file not found: {config_file}")
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
workflow_context = init_bundle.get('workflow_context')
workload = init_bundle.get('workload') or []
if not workflow_context:
raise AirflowFailException("No workflow_context from init_workflow")
# resolve full object key + mtime by matching base name from workload
full_object_key, object_mtime = None, None
for w in workload:
if w.get('base') == source_filename:
full_object_key = w.get('object')
object_mtime = w.get('mtime')
break
# Print/log the file being processed
logging.info("%s: picking file %s (object=%s, mtime=%s)",
task_name, source_filename, full_object_key or source_filename, object_mtime)
print(f"{task_name}: picking file {source_filename} (object={full_object_key or source_filename}, mtime={object_mtime})")
try:
# NOTE: if MRDS expects full URI, change 'source_filename' to 'full_object_key'
mrds_main(
workflow_context,
source_filename, # or full_object_key if required in your env
config_file,
generate_workflow_context=False
)
except Exception:
logging.exception("%s: MRDS failed on %s", task_name, source_filename)
raise
# Mark processed with the mtime we saw during poll
if full_object_key and object_mtime:
_mark_processed_ts([(full_object_key, object_mtime)])
ti.xcom_push(key='task_status', value='SUCCESS')
logging.info("%s: success", task_name)
return "SUCCESS"
def finalise_workflow_task(**context):
"""Finalize workflow across all per-file tasks (mapped)."""
from airflow.utils.state import State
ti = context['ti']
dag_run = context['dag_run']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
a_workflow_history_key = init_bundle.get('workflow_history_key')
if a_workflow_history_key is None:
raise AirflowFailException("No workflow history key; cannot finalise workflow")
mapped_task_id = "m_ODS_LM_ADHOC_ADJUSTMENT"
tis = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
if not tis:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info("Finalised workflow %s as SUCCESS (no files)", a_workflow_history_key)
return
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
if not any_failed:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
return
failed_idxs = [getattr(ti_i, "map_index", None) for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error("Finalised workflow %s as FAILED (failed map indexes=%s)", a_workflow_history_key, failed_idxs)
raise AirflowFailException(f"Workflow failed for mapped indexes: {failed_idxs}")
def check_success_for_mopdb(**context):
"""Check if all processing tasks succeeded before triggering MOPDB."""
from airflow.utils.state import State
try:
ti = context['ti']
dag_run = context['dag_run']
has_failures = False
failure_reasons = []
# Check finalize_workflow task
finalize_task = dag_run.get_task_instance('finalize_workflow')
if finalize_task.state == State.FAILED:
has_failures = True
failure_reasons.append("finalize_workflow failed")
# Check all mapped tasks (per-file processing)
mapped_task_id = "m_ODS_LM_ADHOC_ADJUSTMENT"
mapped_tasks = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
for task_instance in mapped_tasks:
if task_instance.state in {State.FAILED, State.UPSTREAM_FAILED}:
has_failures = True
map_idx = getattr(task_instance, 'map_index', 'unknown')
failure_reasons.append(f"Processing task failed at index {map_idx}")
if has_failures:
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
# Check if all mapped tasks were skipped (no files to process)
all_skipped = all(t.state == State.SKIPPED for t in mapped_tasks) if mapped_tasks else True
if all_skipped or not mapped_tasks:
error_msg = "All processing tasks were skipped (no files to process) - skipping MOPDB trigger"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
return "SUCCESS"
except AirflowSkipException:
raise
except Exception as e:
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
# Operators & Dependencies
poll_task = poll_oci_for_xml()
init_out = init_workflow(poll_task)
task_cfgs = get_task_configs(init_out)
@af_task(task_id="m_ODS_LM_ADHOC_ADJUSTMENT", max_active_tis_per_dag=1)
def mapped_run(task_name: str, source_filename: str, config_file: str, **context):
return run_mrds_task(task_name=task_name, source_filename=source_filename, config_file=config_file, **context)
per_file = mapped_run.expand_kwargs(task_cfgs)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
check_mopdb = PythonOperator(
task_id='check_success_for_mopdb',
python_callable=check_success_for_mopdb,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
trigger_mopdb = TriggerDagRunOperator(
task_id="Trigger_w_MOPDB_LM_ADHOC_ADJUSTMENT",
trigger_dag_id="w_MOPDB_LM_ADHOC_ADJUSTMENT",
conf={
"source_dag": dag_id,
"upstream_run_id": "{{ run_id }}",
"objects": "{{ (ti.xcom_pull(task_ids='poll_oci_for_xml')['workload'] | map(attribute='object') | list) if ti.xcom_pull(task_ids='poll_oci_for_xml') else [] }}",
"workflow_history_key": "{{ (ti.xcom_pull(task_ids='init_workflow')['workflow_history_key']) if ti.xcom_pull(task_ids='init_workflow') else None }}"
},
wait_for_completion=False, # CHANGED: Don't wait for completion
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, # CHANGED: Only trigger if check succeeds
retries=0,
)
all_good = EmptyOperator(
task_id="All_went_well",
trigger_rule=TriggerRule.ALL_DONE, # CHANGED: Always run to mark end
)
# CHANGED: Chain with check task before trigger
poll_task >> init_out >> task_cfgs >> per_file >> finalize_workflow >> check_mopdb >> trigger_mopdb >> all_good
logging.info(
"EXDI DAG ready: inbox_prefix=%s; using per-object processed ts map %s.",
OBJECT_PREFIX, PROCESSED_TS_VAR
)

View File

@@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns="http://escb.ecb.int/balancesheet"
xmlns:lm="http://exdi.ecb.int/lm"
targetNamespace="http://escb.ecb.int/balancesheet"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:import namespace="http://exdi.ecb.int/lm" schemaLocation="../../lm_common/lm.xsd" />
<xs:element name="balanceSheetMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="cbBalanceSheet" type="cbBalanceSheet" minOccurs="1" maxOccurs="unbounded" />
<xs:element name="eurosystemBalanceSheet" type="eurosystemBalanceSheet" minOccurs="0" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:complexType name="cbHeader">
<xs:complexContent>
<xs:extension base="eurosystemHeader">
<xs:sequence>
<xs:element name="status" type="status" />
<xs:element name="freeText" type="lm:freeText" minOccurs="0" />
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="eurosystemHeader">
<xs:sequence>
<xs:element name="country" type="lm:isoCode" />
<xs:element name="referenceDate" type="xs:date" />
<xs:element name="version" type="lm:positiveInt" />
<xs:element name="dateOfTransmission" type="xs:date" minOccurs="0" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="cbBalanceSheet">
<xs:annotation>
<xs:documentation>Represents a balanceSheet for a CB</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="header" type="cbHeader" />
<xs:element name="assets" type="balanceSheetItem" minOccurs="1" />
<xs:element name="liabilities" type="balanceSheetItem" minOccurs="1" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="eurosystemBalanceSheet">
<xs:annotation>
<xs:documentation>Represents an aggregated balanceSheet of all CBs</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="header" type="eurosystemHeader" />
<xs:element name="assets" type="balanceSheetItem" minOccurs="1" />
<xs:element name="liabilities" type="balanceSheetItem" minOccurs="1" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="balanceSheetItem">
<xs:sequence>
<xs:element name="level" type="lm:positiveInt" />
<xs:element name="position" type="lm:positiveInt" />
<xs:element name="itemType" type="itemType" minOccurs="0" />
<xs:element name="fullyQualifiedPosition" type="fullyQualifiedPosition" minOccurs="0" />
<xs:element name="name" type="itemName" minOccurs="0" />
<xs:element name="amount" type="lm:amountInEuro" />
<xs:element name="item" type="balanceSheetItem" minOccurs="0" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
<xs:simpleType name="itemName">
<xs:restriction base="xs:string">
<xs:maxLength value="200" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="itemType">
<xs:restriction base="xs:string">
<xs:enumeration value="Asset" />
<xs:enumeration value="Liability" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="fullyQualifiedPosition">
<xs:restriction base="xs:string">
<xs:pattern value="(A|L)(\.([1-9][0-9]*))+" />
<xs:maxLength value="200" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="status">
<xs:restriction base="xs:string">
<xs:enumeration value="U" />
<xs:enumeration value="B" />
<xs:enumeration value="R" />
</xs:restriction>
</xs:simpleType>
</xs:schema>

View File

@@ -0,0 +1,82 @@
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/LM/AggregatedDailyFinancialStatementOfTheEurosystem
archive_prefix: ARCHIVE/LM/AggregatedDailyFinancialStatementOfTheEurosystem
workflow_name: w_ODS_LM_BALANCESHEET
validation_schema_path: '/opt/airflow/src/airflow/dags/ods/lm/balancesheet/config/balancesheet.xsd'
file_type: xml
# List of tasks
tasks:
- task_name: m_ODS_LM_BALANCESHEET_HEADER_PARSE
ods_prefix: INBOX/LM/AggregatedDailyFinancialStatementOfTheEurosystem/LM_BALANCESHEET_HEADER
output_table: LM_BALANCESHEET_HEADER
namespaces:
ns2: 'http://escb.ecb.int/balancesheet'
output_columns:
- type: 'xpath_element_id'
value: '/ns2:balanceSheetMessage/ns2:*'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath'
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:country'
column_header: 'COUNTRY'
is_key: 'N'
- type: 'xpath'
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:referenceDate'
column_header: 'REFERENCE_DATE'
is_key: 'N'
- type: 'xpath'
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:version'
column_header: 'VERSION'
is_key: 'N'
- type: 'xpath'
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:status'
column_header: 'STATUS'
is_key: 'N'
- type: 'xpath'
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:freeText'
column_header: 'FREE_TEXT'
is_key: 'N'
- task_name: m_ODS_LM_BALANCESHEET_ITEM_PARSE
ods_prefix: INBOX/LM/AggregatedDailyFinancialStatementOfTheEurosystem/LM_BALANCESHEET_ITEM
output_table: LM_BALANCESHEET_ITEM
namespaces:
ns2: 'http://escb.ecb.int/balancesheet'
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'xpath_element_id'
value: '/ns2:balanceSheetMessage/ns2:*'
column_header: 'A_HEADER_FK'
- type: 'xpath'
value: '/ns2:balanceSheetMessage//ns2:level'
is_key: 'N'
column_header: 'ITEM_LEVEL'
- type: 'xpath'
value: '/ns2:balanceSheetMessage//ns2:position'
is_key: 'N'
column_header: 'POSITION'
- type: 'xpath'
value: '/ns2:balanceSheetMessage//ns2:itemType'
is_key: 'N'
column_header: 'ITEM_TYPE'
- type: 'xpath'
value: '/ns2:balanceSheetMessage//ns2:fullyQualifiedPosition'
column_header: 'FULLY_QUALIFIED_POSITION'
is_key: 'N'
- type: 'xpath'
value: '/ns2:balanceSheetMessage//ns2:name'
column_header: 'NAME'
is_key: 'N'
- type: 'xpath'
value: '/ns2:balanceSheetMessage//ns2:amount'
column_header: 'AMOUNT'
is_key: 'N'

View File

@@ -0,0 +1,520 @@
# dags/m_ODS_LM_BALANCESHEET.py
# Idempotent, per-object mtime tracking
import sys
import os
import json
import logging
from pathlib import Path
from datetime import timedelta, datetime, timezone
from email.utils import parsedate_to_datetime
from airflow import DAG
from airflow.models import Variable
from airflow.decorators import task as af_task
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.operators.empty import EmptyOperator
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
# Import libs
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from mrds.core import main as mrds_main
dag_id = Path(__file__).stem
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id,
}
# OCI settings
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
OCI_BUCKET = os.getenv("INBOX_BUCKET")
# Config YAML (single config for all files)
CONFIG_YAML = os.getenv(
"EXDI_SINGLE_CONFIG_YAML",
"/opt/airflow/src/airflow/dags/ods/lm/balancesheet/config/m_ODS_LM_BALANCESHEET_PARSE.yaml",
)
logging.info("Using EXDI_SINGLE_CONFIG_YAML=%s", CONFIG_YAML)
# Idempotency controls
REPROCESS = (os.getenv("EXDI_REPROCESS", "false").lower() in ("1", "true", "yes"))
LAST_TS_VAR = f"{dag_id}__last_seen_ts" # legacy watermark (kept for observability)
PROCESSED_SET_VAR = f"{dag_id}__processed_objects" # legacy: list of keys (back-compat only)
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts" # NEW: map key -> last processed mtime (epoch float)
# Helpers
def _oci_client():
"""
Create an OCI Object Storage client.
Order: Resource Principals -> Instance Principals.
"""
import oci
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
# RP
try:
rp_signer = oci.auth.signers.get_resource_principals_signer()
cfg = {"region": region} if region else {}
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
except Exception as e:
logging.info("RP not available: %s", e)
# IP
try:
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
cfg = {"region": region} if region else {}
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
except Exception as e:
logging.info("IP not available: %s", e)
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
raise RuntimeError("Failed to create OCI client")
def _load_yaml(cfg_path: str) -> dict:
import yaml
p = Path(cfg_path)
if not p.exists():
raise FileNotFoundError(f"Config YAML not found: {cfg_path}")
return yaml.safe_load(p.read_text()) or {}
# Build config-derived constants directly from YAML
try:
CONFIG_DATA = _load_yaml(CONFIG_YAML)
OBJECT_PREFIX = CONFIG_DATA.get("inbox_prefix")
if not (isinstance(OBJECT_PREFIX, str) and OBJECT_PREFIX.strip()):
raise AirflowFailException("YAML must define 'inbox_prefix' for OBJECT_PREFIX.")
OBJECT_PREFIX = OBJECT_PREFIX.strip()
logging.info("YAML inbox_prefix -> OBJECT_PREFIX: %s", OBJECT_PREFIX)
except Exception as e:
logging.error("Failed to resolve OBJECT_PREFIX from YAML %s: %s", CONFIG_YAML, e)
OBJECT_PREFIX = None
# New idempotency map (key -> last_processed_ts)
def _load_processed_map() -> dict[str, float]:
"""
Returns {object_key: last_processed_ts}.
Back-compat: if old set variable exists (list), treat those keys as ts=0.
"""
try:
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
m = json.loads(raw) or {}
if isinstance(m, dict):
return {k: float(v) for k, v in m.items()}
except Exception:
pass
# Back-compat: migrate old set/list
try:
old = json.loads(Variable.get(PROCESSED_SET_VAR, default_var="[]"))
if isinstance(old, list):
return {k: 0.0 for k in old}
except Exception:
pass
return {}
def _save_processed_map(m: dict[str, float]) -> None:
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
def _mark_processed_ts(objs: list[tuple[str, float]]):
"""
Update processed map with list of (object_key, mtime).
"""
if REPROCESS or not objs:
return
m = _load_processed_map()
for key, ts in objs:
try:
ts = float(ts)
except Exception:
continue
prev = float(m.get(key, 0.0))
if ts > prev:
m[key] = ts
_save_processed_map(m)
logging.info("Processed map updated; size=%d", len(m))
# Object listing (per-key mtime)
def _list_new_xml_objects(prefix: str) -> list[dict]:
"""
List .xml objects and decide inclusion per-object:
include if REPROCESS or object_mtime > processed_map.get(object_key, 0.0)
Returns: [{"name": "<full-key>", "base": "<file.xml>", "mtime": <epoch float>}]
"""
if not OCI_NAMESPACE or not OCI_BUCKET:
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
client = _oci_client()
processed_map = _load_processed_map()
try:
last_seen = float(Variable.get(LAST_TS_VAR, default_var="0"))
except Exception:
last_seen = 0.0
logging.info("Watermark last_seen=%s; processed_map_count=%d; prefix=%s",
last_seen, len(processed_map), prefix)
# NOTE: add pagination if needed
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=prefix)
new_items: list[dict] = []
newest_ts = last_seen
for o in (resp.data.objects or []):
name = (o.name or "").strip()
base = name.rsplit("/", 1)[-1] if name else ""
logging.info("Processing object: %s", base)
# Skip folder markers / empty keys
if not name or name.endswith('/') or not base:
logging.debug("Skip: folder marker or empty key: %r", name)
continue
if not base.lower().endswith(".xml"):
logging.debug("Skip: not .xml: %r", name)
continue
# Resolve mtime
ts = None
t = getattr(o, "time_created", None)
if t:
try:
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
except Exception:
ts = None
if ts is None:
try:
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
if lm:
dt = parsedate_to_datetime(lm)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
ts = dt.timestamp()
logging.debug("Resolved ts via HEAD Last-Modified for %s: %s", name, ts)
except Exception as e:
logging.warning("head_object failed for %s: %s", name, e)
if ts is None:
ts = datetime.now(timezone.utc).timestamp()
logging.warning("Object %s missing timestamp; falling back to now=%s", name, ts)
last_proc_ts = float(processed_map.get(name, 0.0))
include = REPROCESS or (ts > last_proc_ts)
logging.info(
"Decision for %s: obj_ts=%s, last_proc_ts=%s, REPROCESS=%s -> include=%s",
name, ts, last_proc_ts, REPROCESS, include
)
if not include:
continue
item = {"name": name, "base": base, "mtime": ts}
new_items.append(item)
if ts > newest_ts:
newest_ts = ts
# Watermark advanced for visibility (optional)
if not REPROCESS and new_items and newest_ts > last_seen:
Variable.set(LAST_TS_VAR, str(newest_ts))
logging.info("Advanced watermark from %s to %s", last_seen, newest_ts)
new_items.sort(key=lambda x: x["mtime"]) # ascending
logging.info("Found %d candidate .xml object(s) under prefix %s", len(new_items), prefix)
return new_items
# DAG
with DAG(
dag_id=dag_id,
default_args=default_args,
description='EXDI workflow (polling): single YAML config for all XML files in OCI',
schedule_interval=None, # Run EVERY 10 MIN
catchup=False,
max_active_runs=1,
render_template_as_native_obj=True,
tags=["EXDI", "MRDS", "ODS", "OCI", "BALANCESHEET"],
) as dag:
@af_task(task_id="poll_oci_for_xml")
def poll_oci_for_xml():
"""
Lists new .xml objects and prepares a workload list.
Returns {"workload": [{"object": "<key>", "base": "<file.xml>", "mtime": <float>} ...]}
"""
if not OBJECT_PREFIX:
raise AirflowFailException("No OCI object prefix configured. Check YAML 'inbox_prefix'.")
new_objs = _list_new_xml_objects(OBJECT_PREFIX)
logging.info("New .xml objects found: %s", json.dumps(new_objs, indent=2))
print("New .xml objects found:", json.dumps(new_objs, indent=2))
# already contains base + mtime
workload = [{"object": it["name"], "base": it["base"], "mtime": it["mtime"]} for it in new_objs]
logging.info("Prepared workload items: %d", len(workload))
print("Prepared workload:", json.dumps(workload, indent=2))
return {"workload": workload}
@af_task(task_id="init_workflow")
def init_workflow(polled: dict):
"""Initialize workflow; start MRDS workflow; build per-file task configs."""
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing = []
if not username: missing.append("MRDS_LOADER_DB_USER")
if not password: missing.append("MRDS_LOADER_DB_PASS")
if not tnsalias: missing.append("MRDS_LOADER_DB_TNS")
raise AirflowFailException(f"Missing required env vars: {', '.join(missing)}")
workload = (polled or {}).get("workload") or []
# Airflow context for run_id
from airflow.operators.python import get_current_context
ctx = get_current_context()
run_id = str(ctx['ti'].run_id)
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
workflow_context = {
"run_id": run_id,
"a_workflow_history_key": a_workflow_history_key
}
# Build TASK_CONFIGS dynamically: one per file, sequential numbering
task_base_name = "m_ODS_LM_BALANCESHEET"
task_configs = []
for idx, w in enumerate(workload, start=1):
task_configs.append({
"task_name": f"{task_base_name}_{idx}",
"source_filename": w["base"], # pass basename to MRDS (adjust if you need full key)
"config_file": CONFIG_YAML,
})
bundle = {
"workflow_history_key": a_workflow_history_key,
"workflow_context": workflow_context,
"workload": workload, # includes object + mtime
"task_configs": task_configs, # list-of-dicts for mapping
"env": env,
}
logging.info("Init complete; workload=%d, tasks=%d", len(workload), len(task_configs))
return bundle
@af_task(task_id="get_task_configs")
def get_task_configs(init_bundle: dict):
return init_bundle["task_configs"]
def run_mrds_task(task_name: str, source_filename: str, config_file: str, **context):
"""Run MRDS for a single file (sequential via mapped task with max_active_tis_per_dag=1)."""
ti = context['ti']
if not os.path.exists(config_file):
raise FileNotFoundError(f"Config file not found: {config_file}")
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
workflow_context = init_bundle.get('workflow_context')
workload = init_bundle.get('workload') or []
if not workflow_context:
raise AirflowFailException("No workflow_context from init_workflow")
# resolve full object key + mtime by matching base name from workload
full_object_key, object_mtime = None, None
for w in workload:
if w.get('base') == source_filename:
full_object_key = w.get('object')
object_mtime = w.get('mtime')
break
# Print/log the file being processed
logging.info("%s: picking file %s (object=%s, mtime=%s)",
task_name, source_filename, full_object_key or source_filename, object_mtime)
print(f"{task_name}: picking file {source_filename} (object={full_object_key or source_filename}, mtime={object_mtime})")
try:
# NOTE: if MRDS expects full URI, change 'source_filename' to 'full_object_key'
mrds_main(
workflow_context,
source_filename, # or full_object_key if required in your env
config_file,
generate_workflow_context=False
)
except Exception:
logging.exception("%s: MRDS failed on %s", task_name, source_filename)
raise
# Mark processed with the mtime we saw during poll
if full_object_key and object_mtime:
_mark_processed_ts([(full_object_key, object_mtime)])
ti.xcom_push(key='task_status', value='SUCCESS')
logging.info("%s: success", task_name)
return "SUCCESS"
def finalise_workflow_task(**context):
"""Finalize workflow across all per-file tasks (mapped)."""
from airflow.utils.state import State
ti = context['ti']
dag_run = context['dag_run']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
a_workflow_history_key = init_bundle.get('workflow_history_key')
if a_workflow_history_key is None:
raise AirflowFailException("No workflow history key; cannot finalise workflow")
mapped_task_id = "m_ODS_LM_BALANCESHEET"
tis = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
if not tis:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info("Finalised workflow %s as SUCCESS (no files)", a_workflow_history_key)
return
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
if not any_failed:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
return
failed_idxs = [getattr(ti_i, "map_index", None) for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error("Finalised workflow %s as FAILED (failed map indexes=%s)", a_workflow_history_key, failed_idxs)
raise AirflowFailException(f"Workflow failed for mapped indexes: {failed_idxs}")
def check_success_for_mopdb(**context):
"""Check if all processing tasks succeeded before triggering MOPDB."""
from airflow.utils.state import State
try:
ti = context['ti']
dag_run = context['dag_run']
has_failures = False
failure_reasons = []
# Check finalize_workflow task
finalize_task = dag_run.get_task_instance('finalize_workflow')
if finalize_task.state == State.FAILED:
has_failures = True
failure_reasons.append("finalize_workflow failed")
# Check all mapped tasks (per-file processing)
mapped_task_id = "m_ODS_LM_BALANCESHEET"
mapped_tasks = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
for task_instance in mapped_tasks:
if task_instance.state in {State.FAILED, State.UPSTREAM_FAILED}:
has_failures = True
map_idx = getattr(task_instance, 'map_index', 'unknown')
failure_reasons.append(f"Processing task failed at index {map_idx}")
if has_failures:
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
# Check if all mapped tasks were skipped (no files to process)
all_skipped = all(t.state == State.SKIPPED for t in mapped_tasks) if mapped_tasks else True
if all_skipped or not mapped_tasks:
error_msg = "All processing tasks were skipped (no files to process) - skipping MOPDB trigger"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
return "SUCCESS"
except AirflowSkipException:
raise
except Exception as e:
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
# Operators & Dependencies
poll_task = poll_oci_for_xml()
init_out = init_workflow(poll_task)
task_cfgs = get_task_configs(init_out)
@af_task(task_id="m_ODS_LM_BALANCESHEET", max_active_tis_per_dag=1)
def mapped_run(task_name: str, source_filename: str, config_file: str, **context):
return run_mrds_task(task_name=task_name, source_filename=source_filename, config_file=config_file, **context)
per_file = mapped_run.expand_kwargs(task_cfgs)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
check_mopdb = PythonOperator(
task_id='check_success_for_mopdb',
python_callable=check_success_for_mopdb,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
trigger_mopdb = TriggerDagRunOperator(
task_id="Trigger_w_MOPDB_LM_BALANCESHEET",
trigger_dag_id="w_MOPDB_LM_BALANCESHEET",
conf={
"source_dag": dag_id,
"upstream_run_id": "{{ run_id }}",
"objects": "{{ (ti.xcom_pull(task_ids='poll_oci_for_xml')['workload'] | map(attribute='object') | list) if ti.xcom_pull(task_ids='poll_oci_for_xml') else [] }}",
"workflow_history_key": "{{ (ti.xcom_pull(task_ids='init_workflow')['workflow_history_key']) if ti.xcom_pull(task_ids='init_workflow') else None }}"
},
wait_for_completion=False, # CHANGED: Don't wait for completion
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, # CHANGED: Only trigger if check succeeds
retries=0,
)
all_good = EmptyOperator(
task_id="All_went_well",
trigger_rule=TriggerRule.ALL_DONE, # CHANGED: Always run to mark end
)
# CHANGED: Chain with check task before trigger
poll_task >> init_out >> task_cfgs >> per_file >> finalize_workflow >> check_mopdb >> trigger_mopdb >> all_good
logging.info(
"EXDI DAG ready: inbox_prefix=%s; using per-object processed ts map %s.",
OBJECT_PREFIX, PROCESSED_TS_VAR
)

View File

@@ -0,0 +1,129 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns="http://escb.ecb.int/csm-adjustment"
xmlns:lm="http://exdi.ecb.int/lm"
targetNamespace="http://escb.ecb.int/csm-adjustment"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:import namespace="http://exdi.ecb.int/lm" schemaLocation="../../lm_common/lm.xsd" />
<xs:element name="adjustmentMessages">
<xs:complexType>
<xs:choice>
<xs:element ref="csmAdjustmentMessage" />
<xs:element ref="quarterlyRevaluationAdjustmentMessage" />
<xs:element ref="adhocAdjustmentMessage" />
</xs:choice>
</xs:complexType>
</xs:element>
<xs:element name="csmAdjustmentMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="header">
<xs:complexType>
<xs:sequence>
<xs:element name="year" type="xs:gYear" />
<xs:element name="month" type="month" />
<xs:element name="version" type="lm:positiveInt" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adjustment" type="adjustmentSingleForecast" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="quarterlyRevaluationAdjustmentMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="header">
<xs:complexType>
<xs:sequence>
<xs:element name="year" type="xs:gYear" />
<xs:element name="quarter" type="quarter" />
<xs:element name="version" type="lm:positiveInt" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adhocAdjustmentMessage">
<xs:complexType>
<xs:sequence>
<xs:element name="header">
<xs:complexType>
<xs:sequence>
<xs:element name="date" type="xs:date" />
<xs:element name="version" type="lm:positiveInt" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:complexType name="baseAdjustment">
<xs:sequence>
<xs:element name="country" type="lm:isoCode" />
<xs:element name="effectiveDate" type="xs:date" />
<xs:element name="lastDateNotInForecast" type="xs:date" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="adjustmentSingleForecast">
<xs:complexContent>
<xs:extension base="baseAdjustment">
<xs:sequence>
<xs:element name="forecastItem" type="forecastItem" />
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="adjustmentMultipleForecasts">
<xs:complexContent>
<xs:extension base="baseAdjustment">
<xs:sequence>
<xs:element name="forecastItem" type="forecastItem" minOccurs="1" maxOccurs="unbounded" />
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="forecastItem">
<xs:sequence>
<xs:element name="forecastName" type="lm:forecastName" />
<xs:element name="adjustmentAmount" type="extendedDecimalEuroValue" />
</xs:sequence>
</xs:complexType>
<xs:simpleType name="extendedDecimalEuroValue">
<xs:restriction base="xs:decimal">
<xs:totalDigits value="17" />
<xs:fractionDigits value="8" />
<xs:minInclusive value="-999999999.99999999" />
<xs:maxInclusive value="999999999.99999999" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="month">
<xs:restriction base="xs:int">
<xs:minInclusive value="1" />
<xs:maxInclusive value="12" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="quarter">
<xs:restriction base="xs:int">
<xs:minInclusive value="1" />
<xs:maxInclusive value="4" />
</xs:restriction>
</xs:simpleType>
</xs:schema>

Some files were not shown because too many files have changed in this diff Show More