init
This commit is contained in:
0
.ci/.gitkeep
Normal file
0
.ci/.gitkeep
Normal file
74
.ci/mr_dev_to_latest.sh
Normal file
74
.ci/mr_dev_to_latest.sh
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/bin/bash
|
||||
|
||||
REPO_URL="https://oauth2:${GL_TOKEN}@gitlab.sofa.dev/mrds/mrds_elt.git"
|
||||
DEV_BRANCH="dev"
|
||||
TARGET_BRANCH="latest"
|
||||
WORK_DIR="mrds_elt_merge"
|
||||
MERGE_BRANCH="dev_to_latest_merge"
|
||||
DIRECTORIES=(
|
||||
"airflow/devo_replicator"
|
||||
"airflow/ods/rqsd"
|
||||
"airflow/mopdb/RQSD"
|
||||
"dbt"
|
||||
"python/connectors/devo"
|
||||
"python/devo_replicator"
|
||||
"python/mrds_common"
|
||||
".ci"
|
||||
".gitlab-ci.yml"
|
||||
".ci/mr_dev_to_latest.sh"
|
||||
)
|
||||
|
||||
echo "Setting up working directory..."
|
||||
git branch -D "$MERGE_BRANCH" 2>/dev/null || true
|
||||
rm -rf "$WORK_DIR"
|
||||
mkdir -p "$WORK_DIR"
|
||||
cd "$WORK_DIR"
|
||||
|
||||
echo "Cloning repository..."
|
||||
git clone "$REPO_URL" .
|
||||
git config --global user.email "cicd@sofa.dev"
|
||||
git config --global user.name "CICD Pipeline"
|
||||
|
||||
echo "Creating merge branch from $TARGET_BRANCH..."
|
||||
git checkout "$TARGET_BRANCH"
|
||||
git checkout -b "$MERGE_BRANCH"
|
||||
|
||||
echo "Fetching $DEV_BRANCH branch..."
|
||||
git fetch origin "$DEV_BRANCH"
|
||||
|
||||
echo "Selecting changes from $DEV_BRANCH for specific directories..."
|
||||
CHANGES_FOUND=false
|
||||
|
||||
for dir in "${DIRECTORIES[@]}"; do
|
||||
echo "Copying $dir from $DEV_BRANCH to $MERGE_BRANCH"
|
||||
git checkout "origin/$DEV_BRANCH" -- "$dir"
|
||||
done
|
||||
|
||||
if ! git diff --quiet "$TARGET_BRANCH"; then
|
||||
git commit -am "Update selected directories from $DEV_BRANCH"
|
||||
CHANGES_FOUND=true
|
||||
fi
|
||||
|
||||
if ! git diff --quiet "$TARGET_BRANCH"; then
|
||||
CHANGES_FOUND=true
|
||||
fi
|
||||
|
||||
if [ "$CHANGES_FOUND" = true ]; then
|
||||
echo "Pushing merge branch..."
|
||||
git push -f -u "$REPO_URL" "$MERGE_BRANCH"
|
||||
|
||||
echo "Creating merge request..."
|
||||
if [ -z "$GL_TOKEN" ]; then
|
||||
echo "GL_TOKEN environment variable not set. Cannot create merge request."
|
||||
exit 1
|
||||
else
|
||||
curl -X POST \
|
||||
-H "PRIVATE-TOKEN: $GL_TOKEN" \
|
||||
"https://gitlab.sofa.dev/api/v4/projects/${CI_PROJECT_ID}/merge_requests" \
|
||||
-d "source_branch=${MERGE_BRANCH}&target_branch=${TARGET_BRANCH}&title=Requested merge of selected directories from ${DEV_BRANCH} to ${TARGET_BRANCH}&description=This MR contains selected directories from ${DEV_BRANCH} branch."
|
||||
fi
|
||||
else
|
||||
echo "No changes found between $DEV_BRANCH and $TARGET_BRANCH for the specified directories."
|
||||
echo "Skipping merge request creation."
|
||||
exit 0
|
||||
fi
|
||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
__pycache__/
|
||||
*.log
|
||||
.venv
|
||||
.tox
|
||||
*.egg-info/
|
||||
96
.gitlab-ci.yml
Normal file
96
.gitlab-ci.yml
Normal file
@@ -0,0 +1,96 @@
|
||||
stages:
|
||||
- analyze
|
||||
- build
|
||||
- merge
|
||||
|
||||
variables:
|
||||
TARGET_PROJECT: "mrds%2Foci-terraform%2Fmars%2Fmars-airflow"
|
||||
GITLAB_API_URL: "https://gitlab.sofa.dev/api/v4/projects"
|
||||
|
||||
|
||||
sonarqube_check_latest_sources:
|
||||
stage: analyze
|
||||
image: artifactory.sofa.dev/docker-remote/sonarsource/sonar-scanner-cli:latest
|
||||
script:
|
||||
- sonar-scanner
|
||||
-Dsonar.projectKey=$SONAR_PROJECT_KEY
|
||||
-Dsonar.projectBaseDir=.
|
||||
-Dsonar.sources=.
|
||||
-Dsonar.host.url=$SONAR_HOST_URL
|
||||
-Dsonar.login=$SONAR_TOKEN
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "latest"'
|
||||
|
||||
build_airflow_from_dev_branch:
|
||||
stage: build
|
||||
when: manual
|
||||
script:
|
||||
- echo "Triggering pipeline in $TARGET_PROJECT on branch $TARGET_BRANCH..."
|
||||
- apt-get update && apt-get install curl -y
|
||||
- echo "#############################################################"
|
||||
- echo ""
|
||||
- echo -e "\e[31mContinue to https://gitlab.sofa.dev/mrds/oci-terraform/mars/mars-airflow/-/pipelines to see the build process progress, its result and to trigger deployment to DEV\e[0m"
|
||||
- echo ""
|
||||
- echo "#############################################################"
|
||||
- |
|
||||
set -e
|
||||
RESPONSE=$(curl --silent --show-error --fail --request POST \
|
||||
--form token="$TRIGGER_TOKEN" \
|
||||
--form ref="dev" \
|
||||
--form "variables[COMMIT_MESSAGE]=$CI_COMMIT_MESSAGE" \
|
||||
--form "variables[COMMIT_SHA]=$CI_COMMIT_SHA" \
|
||||
--form "variables[MR_AUTHOR]=$GITLAB_USER_NAME" \
|
||||
"$GITLAB_API_URL/$TARGET_PROJECT/trigger/pipeline") || {
|
||||
echo "ERROR: Failed to trigger remote pipeline!"
|
||||
echo "$RESPONSE"
|
||||
exit 1
|
||||
}
|
||||
echo "$RESPONSE"
|
||||
only:
|
||||
- dev
|
||||
|
||||
build_airflow_from_latest_branch:
|
||||
stage: build
|
||||
script:
|
||||
- echo "Triggering pipeline in $TARGET_PROJECT on branch $TARGET_BRANCH..."
|
||||
- apt-get update && apt-get install curl -y
|
||||
- echo "#############################################################"
|
||||
- echo ""
|
||||
- echo -e "\e[31mContinue to https://gitlab.sofa.dev/mrds/oci-terraform/mars/mars-airflow/-/pipelines to see the build process and its result\e[0m"
|
||||
- echo ""
|
||||
- echo "#############################################################"
|
||||
- |
|
||||
set -e
|
||||
RESPONSE=$(curl --silent --show-error --fail --request POST \
|
||||
--form token="$TRIGGER_TOKEN" \
|
||||
--form ref="main" \
|
||||
--form "variables[COMMIT_MESSAGE]=$CI_COMMIT_MESSAGE" \
|
||||
--form "variables[COMMIT_SHA]=$CI_COMMIT_SHA" \
|
||||
--form "variables[MR_AUTHOR]=$GITLAB_USER_NAME" \
|
||||
"$GITLAB_API_URL/$TARGET_PROJECT/trigger/pipeline") || {
|
||||
echo "ERROR: Failed to trigger remote pipeline!"
|
||||
echo "$RESPONSE"
|
||||
exit 1
|
||||
}
|
||||
echo "$RESPONSE"
|
||||
only:
|
||||
- latest
|
||||
|
||||
create_mr_dev_to_latest:
|
||||
stage: merge
|
||||
script:
|
||||
- apt-get update && apt-get install curl git sed -y
|
||||
- |
|
||||
if [ "$CI_COMMIT_BRANCH" != "dev" ]; then
|
||||
echo -e "\e[31mERROR: Merge requests to latest branch are only allowed from dev branch.\e[0m"
|
||||
exit 1
|
||||
fi
|
||||
- bash .ci/mr_dev_to_latest.sh | tee mr_dev_to_latest.log
|
||||
when: manual
|
||||
allow_failure: false
|
||||
only:
|
||||
- dev
|
||||
artifacts:
|
||||
paths:
|
||||
- mr_dev_to_latest.log
|
||||
expire_in: 7 days
|
||||
5
README.md
Normal file
5
README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# MRDS_ELT
|
||||
## Building and deployment rules and procedures
|
||||
https://europeancentralbank.atlassian.net/wiki/spaces/MRDS/pages/588221438/Gitlab+and+CI+CD+setup
|
||||
#
|
||||
#
|
||||
0
airflow/TestDags/.gitkeep
Normal file
0
airflow/TestDags/.gitkeep
Normal file
120
airflow/TestDags/IDMC_Airflow_Test.py
Normal file
120
airflow/TestDags/IDMC_Airflow_Test.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import re
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
|
||||
# from infromatic team, : connect to infromatica (akash)
|
||||
|
||||
# Utility to make task_id Airflow-safe
|
||||
def sanitize_task_id(task_id: str) -> str:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id)
|
||||
if not re.match(r'^[a-zA-Z0-9]', sanitized):
|
||||
sanitized = 'task_' + sanitized
|
||||
return sanitized
|
||||
|
||||
# Fetch parameters from Airflow Variables
|
||||
iics_username = Variable.get("iics_username")
|
||||
iics_password = Variable.get("iics_password")
|
||||
task_type = Variable.get("task_type", default_var="MTT")
|
||||
base_url = Variable.get("iics_base_url", default_var="")
|
||||
|
||||
# Task name
|
||||
CDI_task_name = "CDI_task"
|
||||
|
||||
# Default DAG args
|
||||
default_args = {
|
||||
'owner': 'infa',
|
||||
'depends_on_past': False,
|
||||
'email': ['airflow@example.com'],
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
'start_date': datetime.now() - timedelta(seconds=10),
|
||||
}
|
||||
|
||||
# API logic (same as before)
|
||||
def get_session_id(un, pw):
|
||||
data = {'@type': 'login', 'username': un, 'password': pw}
|
||||
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
|
||||
r = requests.post(base_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
return r.json()["icSessionId"], r.json()["serverUrl"]
|
||||
else:
|
||||
print('API call failed:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
|
||||
def start_job(session_id, server_url, taskname, taskType):
|
||||
job_start_url = server_url + "/api/v2/job"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType}
|
||||
r = requests.post(job_start_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
response_content = r.json()
|
||||
print(f"Job {taskname} started successfully")
|
||||
return response_content['taskId'], response_content['runId']
|
||||
else:
|
||||
print('Job failed to start:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
|
||||
def get_status(server_url, session_id, task_id, run_id):
|
||||
job_activity_url = server_url + "/api/v2/activity/activityMonitor"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
r = requests.get(job_activity_url, headers=headers)
|
||||
if r.status_code == 200:
|
||||
for obj in r.json():
|
||||
if obj['taskId'] == task_id and obj['runId'] == run_id:
|
||||
return obj['executionState']
|
||||
else:
|
||||
print('Failed to get status:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
|
||||
def execute_task(task_name):
|
||||
session_id, server_url = get_session_id(iics_username, iics_password)
|
||||
task_id, run_id = start_job(session_id, server_url, task_name, task_type)
|
||||
|
||||
log_url = f"{server_url}/api/v2/activity/activityLog/"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
|
||||
while True:
|
||||
time.sleep(15)
|
||||
status = get_status(server_url, session_id, task_id, run_id)
|
||||
print(f"Task status: {status}")
|
||||
if status not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}:
|
||||
# Fetch logs on completion
|
||||
url = f"{log_url}?taskId={task_id}&runId={run_id}"
|
||||
r = requests.get(url, headers=headers)
|
||||
logs = r.json()
|
||||
for obj in logs:
|
||||
log_id = obj['id']
|
||||
log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers)
|
||||
print(log_detail.text)
|
||||
break
|
||||
|
||||
# DAG with no schedule (manual trigger)
|
||||
dag = DAG(
|
||||
'IDMC_Airflow_Test',
|
||||
default_args=default_args,
|
||||
description='Simplified DAG with one CDI task',
|
||||
schedule_interval=None,
|
||||
catchup=False
|
||||
)
|
||||
|
||||
safe_task_id = sanitize_task_id(CDI_task_name)
|
||||
|
||||
run_cdi_task = PythonOperator(
|
||||
task_id=safe_task_id,
|
||||
python_callable=execute_task,
|
||||
op_kwargs={'task_name': CDI_task_name},
|
||||
dag=dag
|
||||
)
|
||||
142
airflow/TestDags/IICS_Airflow_Demo.py
Normal file
142
airflow/TestDags/IICS_Airflow_Demo.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import re
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.operators.dummy_operator import DummyOperator
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
|
||||
|
||||
# from infromatic team, : connect to infromatica (akash)
|
||||
|
||||
# Utility to make task_id Airflow-safe
|
||||
def sanitize_task_id(task_id: str) -> str:
|
||||
# Replace invalid characters with underscores
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id)
|
||||
# Ensure task_id starts with a letter or number
|
||||
if not re.match(r'^[a-zA-Z0-9]', sanitized):
|
||||
sanitized = 'task_' + sanitized
|
||||
return sanitized
|
||||
|
||||
# Fetch parameters from Airflow Variables
|
||||
iics_username = Variable.get("iics_username")
|
||||
iics_password = Variable.get("iics_password")
|
||||
task_type = Variable.get("task_type", default_var="MTT")
|
||||
base_url = Variable.get("iics_base_url", default_var="https://dm-us.informaticacloud.com/ma/api/v2/user/login")
|
||||
|
||||
# Load task names from Airflow Variables
|
||||
CDI_task_name = json.loads(Variable.get("CDI_task_name", default_var='["Task_Date_Dim", "Task_Items", "Task_Store_Sales"]'))
|
||||
CDI_E_task_name = json.loads(Variable.get("CDI_E_task_name", default_var='["Task_Total_Store_Sales_IWDEMO"]'))
|
||||
|
||||
# Default DAG args
|
||||
default_args = {
|
||||
'owner': 'infa',
|
||||
'depends_on_past': False,
|
||||
'email': ['airflow@example.com'],
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
'start_date': datetime.now() - timedelta(seconds=10),
|
||||
'schedule': '@daily'
|
||||
}
|
||||
|
||||
# API logic
|
||||
def get_session_id(un, pw):
|
||||
session_id = ''
|
||||
data = {'@type': 'login', 'username': un, 'password': pw}
|
||||
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
|
||||
r = requests.post(base_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
session_id = r.json()["icSessionId"]
|
||||
server_url = r.json()["serverUrl"]
|
||||
else:
|
||||
print('API call failed:', r.status_code)
|
||||
print(r.text)
|
||||
sys.exit(1)
|
||||
return session_id, server_url
|
||||
|
||||
def start_job(session_id, server_url, taskname, taskType):
|
||||
job_start_url = server_url + "/api/v2/job"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType}
|
||||
r = requests.post(job_start_url, data=json.dumps(data), headers=headers)
|
||||
if r.status_code == 200:
|
||||
response_content = r.json()
|
||||
print("Job", taskname, "started successfully")
|
||||
return response_content['taskId'], response_content['runId'], response_content['taskName']
|
||||
else:
|
||||
print('Job failed to start:', r.status_code)
|
||||
print(r.text)
|
||||
|
||||
def get_status(server_url, session_id):
|
||||
job_activity_url = server_url + "/api/v2/activity/activityMonitor"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
r = requests.get(job_activity_url, headers=headers)
|
||||
if r.status_code == 200:
|
||||
for obj in r.json():
|
||||
return obj['taskId'], obj['executionState'], obj['taskName'], obj['runId']
|
||||
else:
|
||||
print('Failed to get status:', r.status_code)
|
||||
print(r.text)
|
||||
|
||||
def execute_task(task_name):
|
||||
session_id, server_url = get_session_id(iics_username, iics_password)
|
||||
task_id, run_id, _ = start_job(session_id, server_url, task_name, task_type)
|
||||
|
||||
log_url = f"{server_url}/api/v2/activity/activityLog/"
|
||||
headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'}
|
||||
|
||||
while True:
|
||||
time.sleep(15)
|
||||
task_status = get_status(server_url, session_id)
|
||||
if not task_status or task_status[1] not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}:
|
||||
# Fetch log
|
||||
url = f"{log_url}?taskId={task_id}&runId={run_id}"
|
||||
r = requests.get(url, headers=headers)
|
||||
logs = r.json()
|
||||
for obj in logs:
|
||||
log_id = obj['id']
|
||||
log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers)
|
||||
print(log_detail.text)
|
||||
break
|
||||
|
||||
# Define DAG
|
||||
dag = DAG(
|
||||
'IICS_Airflow_Demo',
|
||||
default_args=default_args,
|
||||
description='A Sample IICS Airflow DAG',
|
||||
schedule_interval='@daily',
|
||||
catchup=False
|
||||
)
|
||||
|
||||
# Task group 1: CDI Tasks
|
||||
cdi_start = DummyOperator(task_id='cdi_start', dag=dag)
|
||||
cdi_end = DummyOperator(task_id='cdi_end', dag=dag)
|
||||
|
||||
for i in CDI_task_name:
|
||||
safe_task_id = 'IICS_CDI_' + sanitize_task_id(i)
|
||||
print(f"Creating task: {safe_task_id} for original task name: {i}")
|
||||
cdi_task = PythonOperator(
|
||||
task_id=safe_task_id,
|
||||
python_callable=execute_task,
|
||||
op_kwargs={'task_name': i},
|
||||
dag=dag
|
||||
)
|
||||
cdi_start >> cdi_task >> cdi_end
|
||||
|
||||
# Task group 2: CDI_E Tasks
|
||||
for j in CDI_E_task_name:
|
||||
safe_task_id = 'IICS_CDI_E_' + sanitize_task_id(j)
|
||||
print(f"Creating E task: {safe_task_id} for original task name: {j}")
|
||||
cdi_e_task = PythonOperator(
|
||||
task_id=safe_task_id,
|
||||
python_callable=execute_task,
|
||||
op_kwargs={'task_name': j},
|
||||
dag=dag
|
||||
)
|
||||
cdi_end >> cdi_e_task
|
||||
75
airflow/TestDags/archive/devo_connector_test.py
Normal file
75
airflow/TestDags/archive/devo_connector_test.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
|
||||
# Import the main function from your script
|
||||
from devo_connector import main as devo_main
|
||||
|
||||
### DEVO CONNECTOR WITH STATIC workflow (task 3)
|
||||
|
||||
|
||||
# Default DAG arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_connector_test',
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow',
|
||||
schedule_interval=None, # we can set later
|
||||
#start_date=datetime(2025, 10, 7),
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Connector'],
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
try:
|
||||
# Pick env from ENV variables
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'"
|
||||
)
|
||||
|
||||
workflow_context = {"run_id": 34, "a_workflow_history_key": 6}
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
#env = "tst"
|
||||
# flow_config_rqsd_observations.yaml
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
devo_main(workflow_context, flow_config_path, env_config_path, env)
|
||||
logging.info("Devo RQSD workflow completed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
)
|
||||
158
airflow/TestDags/archive/devo_replicator_scheduler_rar.py
Normal file
158
airflow/TestDags/archive/devo_replicator_scheduler_rar.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow import DAG
|
||||
from airflow.decorators import task
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from datetime import datetime, timedelta
|
||||
from airflow.operators.python import BranchPythonOperator
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
DAG_NAME = "dev_replicator_scheduler_rar"
|
||||
TARGET_DAG_ID = "devo_replicator_trigger_rar"
|
||||
|
||||
def get_devo_replica_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting MOPDB table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
def check_table_precondition(table_full_name):
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
sql = """
|
||||
WITH LAST_UPDATE_ORACLE AS (
|
||||
SELECT max(process_end) as process_end
|
||||
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
|
||||
WHERE upper(owner||'.'||TARGET_TABLE_NAME) = upper(:table_name)
|
||||
AND PROCESS_END is not null AND PROCESS_SUCCESSFUL='Y'
|
||||
),
|
||||
LAST_UPDATE_DEVO AS (
|
||||
SELECT CASE WHEN last_status = 'FINISHED' THEN LAST_END_TIME ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY') END as process_end
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
WHERE OWNER || '.' || TABLE_NAME = :table_name
|
||||
)
|
||||
SELECT CASE WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) > (SELECT process_end FROM LAST_UPDATE_DEVO)
|
||||
THEN 'Y' ELSE 'N' END AS TRIGGER_DEVO_REPLICATOR FROM dual
|
||||
"""
|
||||
cursor.execute(sql, table_name=table_full_name)
|
||||
result = cursor.fetchone()
|
||||
status = result[0] if result else 'N'
|
||||
logging.info(f"Precondition for {table_full_name}: {status}")
|
||||
cursor.close()
|
||||
return {"table": table_full_name, "trigger": status}
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking precondition for {table_full_name}: {e}")
|
||||
return {"table": table_full_name, "trigger": 'ERROR'}
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
def get_tables_to_trigger(precondition_results):
|
||||
triggered_tables = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
|
||||
logging.info(f"Tables meeting precondition: {triggered_tables}")
|
||||
return [{"owner_table": table_name} for table_name in triggered_tables]
|
||||
|
||||
def branch_on_tables(ti):
|
||||
precondition_results = ti.xcom_pull(task_ids='check_all_tables')
|
||||
tables_to_trigger = [r["table"] for r in precondition_results if r["trigger"] == "Y"]
|
||||
if tables_to_trigger:
|
||||
return "trigger_devo_replicators"
|
||||
else:
|
||||
return "no_table_updated"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=2),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=DAG_NAME,
|
||||
default_args=default_args,
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['DevoScheduler', 'DevoReplicatorTrigger']
|
||||
) as dag:
|
||||
|
||||
@task()
|
||||
def fetch_tables():
|
||||
return get_devo_replica_table_options()
|
||||
|
||||
@task()
|
||||
def check_all_tables(table_list):
|
||||
results = [check_table_precondition(tbl) for tbl in table_list]
|
||||
count_y = sum(1 for r in results if r["trigger"] == "Y")
|
||||
count_n = sum(1 for r in results if r["trigger"] == "N")
|
||||
logging.info(f"Precondition results: {results}")
|
||||
logging.info(f"Tables with trigger = 'Y': {count_y}")
|
||||
logging.info(f"Tables with trigger = 'N': {count_n}")
|
||||
return results
|
||||
|
||||
@task()
|
||||
def output_tables_to_trigger(precondition_results):
|
||||
return get_tables_to_trigger(precondition_results)
|
||||
|
||||
branch_task = BranchPythonOperator(
|
||||
task_id="branch_trigger_check",
|
||||
python_callable=branch_on_tables,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
no_table_updated = EmptyOperator(task_id="no_table_updated")
|
||||
|
||||
tables = fetch_tables()
|
||||
precondition_results = check_all_tables(tables)
|
||||
tables_to_trigger = output_tables_to_trigger(precondition_results)
|
||||
|
||||
trigger_dag = TriggerDagRunOperator.partial(
|
||||
task_id="trigger_devo_replicators",
|
||||
trigger_dag_id=TARGET_DAG_ID,
|
||||
execution_date="{{ ds }}"
|
||||
).expand(conf=tables_to_trigger)
|
||||
|
||||
# Dependencies for branching
|
||||
tables >> precondition_results >> tables_to_trigger >> branch_task
|
||||
branch_task >> [trigger_dag, no_table_updated]
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
1. fetch_tables gets the list of tables.
|
||||
2. check_all_tables checks each table’s trigger status and logs counts.
|
||||
3. output_tables_to_trigger prepares the mapped parameter list for triggering downstream DAGs.
|
||||
4. branch_on_tables decides the path:
|
||||
"trigger_devo_replicators" if any table triggers.
|
||||
"no_table_updated" otherwise.
|
||||
5. BranchPythonOperator implements the conditional branching.
|
||||
6. TriggerDagRunOperator dynamically triggers a run of devo_replicator_trigger_rar per qualifying table.
|
||||
7. EmptyOperator represents the "no tables to trigger" branch.
|
||||
|
||||
"""
|
||||
112
airflow/TestDags/archive/devo_replicator_test.py
Normal file
112
airflow/TestDags/archive/devo_replicator_test.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.oracle.hooks.oracle import OracleHook
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
|
||||
p_run_id = 1234
|
||||
p_service_name = 'MyService'
|
||||
p_table_owner = 'MY_SCHEMA'
|
||||
p_table_name = 'MY_TABLE'
|
||||
p_objectstore_uri = 's3://bucket/uri' # subject to change appropriate for RAR/MOPDB
|
||||
|
||||
|
||||
def start_log_table_task(**context):
|
||||
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.start_log_table(:1, :2, :3, :4); END;"
|
||||
try:
|
||||
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
|
||||
conn = oracle_hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(proc_call, [p_run_id, p_service_name, p_table_owner, p_table_name])
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logging.info("start_log_table executed successfully.")
|
||||
except Exception as e:
|
||||
logging.error("Failed to execute start_log_table: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
def export_table_task(**context):
|
||||
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.export_table(:1, :2, :3, :4); END;"
|
||||
try:
|
||||
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
|
||||
conn = oracle_hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(proc_call, [p_service_name, p_table_owner, p_table_name, p_objectstore_uri])
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logging.info("export_table executed successfully.")
|
||||
except Exception as e:
|
||||
logging.error("Failed to execute export_table: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
def devo_impyla_task(**context):
|
||||
# Placeholder for Impyla (Devo) code
|
||||
# Example for future:
|
||||
# from impala.dbapi import connect
|
||||
# conn = connect(host="...", port=21050)
|
||||
# cursor = conn.cursor()
|
||||
# cursor.execute("...")
|
||||
logging.info("Impyla (Devo) task placeholder ran. Please implement.")
|
||||
|
||||
def end_log_table_task(**context):
|
||||
proc_call = "BEGIN MRDS_LOADER.DATA_REPLICATOR.end_log_table(:1, :2, :3); END;"
|
||||
try:
|
||||
oracle_hook = OracleHook(oracle_conn_id='oracle_default')
|
||||
conn = oracle_hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(proc_call, [p_service_name, p_table_owner, p_table_name])
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
logging.info("end_log_table executed successfully.")
|
||||
except Exception as e:
|
||||
logging.error("Failed to execute end_log_table: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_devo_replicator_test_old',
|
||||
default_args=default_args,
|
||||
description='Run Devo replicator workflow',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Replicator'],
|
||||
) as dag:
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='start_log_table',
|
||||
python_callable=start_log_table_task,
|
||||
)
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='export_table',
|
||||
python_callable=export_table_task,
|
||||
)
|
||||
|
||||
t3 = PythonOperator(
|
||||
task_id='devo_impyla',
|
||||
python_callable=devo_impyla_task,
|
||||
)
|
||||
|
||||
t4 = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
)
|
||||
|
||||
t1 >> t2 >> t3 >> t4
|
||||
|
||||
132
airflow/TestDags/archive/devo_replicator_withSQLOperator.py
Normal file
132
airflow/TestDags/archive/devo_replicator_withSQLOperator.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from airflow import DAG
|
||||
#from airflow.providers.oracle.operators.oracle import SQLExecuteQueryOperator
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
# Import your functions
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_replicator.data_replicator.impala_refresher import main as impala_main
|
||||
|
||||
#step 5) Devo replication
|
||||
|
||||
## DEVO REPLICATOR WITH SQLOperator
|
||||
### check the oracle connection, fixed params --> test cnx
|
||||
### pick it from a file,
|
||||
|
||||
|
||||
|
||||
|
||||
# TASK :
|
||||
# - retrive directly from config file the param {0} and {1} based dev/test
|
||||
|
||||
# need to be passed from infromatic (WLA call) to dags
|
||||
# wla to airflow, cnx done
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_devo_replicator_2',
|
||||
default_args=default_args,
|
||||
description='Run Devo replicator workflow',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Replicator'],
|
||||
) as dag:
|
||||
|
||||
def init_step(**context):
|
||||
env = os.getenv("MRDS_ENV")
|
||||
corporate_store= "corporate store is 'crp_mopdb' for mopdb and 'crp_rar' for rar"
|
||||
config_path = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
|
||||
|
||||
p_service_name = 'MOPDB'
|
||||
p_table_owner = 'MPEC'
|
||||
p_table_name = 'T_MPEC'
|
||||
|
||||
#parse the config yml and filter by dev or test and mopdb or rar
|
||||
p_objectstore_uri = 'https://devo-crp-ffppyd8q.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/mopdb/db' # subject to change as appropriate
|
||||
p_run_id = str(context['ti'].run_id)
|
||||
print(f"=== DEBUG INFO : {p_run_id} ===")
|
||||
context['ti'].xcom_push(key='p_run_id', value=p_run_id)
|
||||
|
||||
init_step = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
t1 = SQLExecuteQueryOperator(
|
||||
task_id='start_log_table',
|
||||
oracle_conn_id='oracle_default',
|
||||
# failed ,open up the cnx
|
||||
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.start_log_table(:p_run_id, :p_service_name, :p_table_owner, :p_table_name); END;",
|
||||
parameters={
|
||||
'p_run_id': p_run_id,
|
||||
'p_service_name': p_service_name,
|
||||
'p_table_owner': p_table_owner,
|
||||
'p_table_name': p_table_name
|
||||
},
|
||||
#oracle_conn_id='oracle_default'
|
||||
)
|
||||
|
||||
t2 = SQLExecuteQueryOperator(
|
||||
task_id='export_table',
|
||||
oracle_conn_id='oracle_default',
|
||||
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.export_table(:p_service_name, :p_table_owner, :p_table_name, :p_objectstore_uri); END;",
|
||||
parameters={
|
||||
'p_service_name': p_service_name,
|
||||
'p_table_owner': p_table_owner,
|
||||
'p_table_name': p_table_name,
|
||||
'p_objectstore_uri': p_objectstore_uri
|
||||
},
|
||||
#oracle_conn_id='oracle_default'
|
||||
)
|
||||
|
||||
# Leaving the Devo/Impyla task as a PythonOperator (placeholder)
|
||||
from airflow.operators.python import PythonOperator
|
||||
def devo_impyla_task(**context):
|
||||
|
||||
status = impala_main(env_config_path, env, table, corporate_store)
|
||||
logging.info("Impyla (Devo) task placeholder ran. Please implement.")
|
||||
|
||||
|
||||
|
||||
# get details-data from impala ( its pending )
|
||||
|
||||
t3 = PythonOperator(
|
||||
task_id='devo_impyla',
|
||||
python_callable=devo_impyla_task,
|
||||
)
|
||||
|
||||
|
||||
# push to s3, we need to call the proc
|
||||
t4 = SQLExecuteQueryOperator(
|
||||
task_id='end_log_table',
|
||||
oracle_conn_id='oracle_default',
|
||||
sql="BEGIN MRDS_LOADER.DATA_REPLICATOR.end_log_table(:p_service_name, :p_table_owner, :p_table_name); END;",
|
||||
parameters={
|
||||
'p_service_name': p_service_name,
|
||||
'p_table_owner': p_table_owner,
|
||||
'p_table_name': p_table_name
|
||||
},
|
||||
#oracle_conn_id='oracle_default'
|
||||
)
|
||||
|
||||
# t4 need to be executed always if we succeed or not ( if t1 failed then go directly to t4)
|
||||
# t5 that will check if any of previous dag failed put everything will be read
|
||||
|
||||
init_step >> t1 >> t2 >> t3 >> t4
|
||||
65
airflow/TestDags/archive/dmarsdb1_data_replicator_test.py
Normal file
65
airflow/TestDags/archive/dmarsdb1_data_replicator_test.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from airflow import DAG
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
## OLD ( Package repliction)
|
||||
|
||||
def test_oracle_connection(**context):
|
||||
"""Test Oracle connection and log the result"""
|
||||
conn_id = "marsdb_loader"
|
||||
from airflow.providers.oracle.hooks.oracle import OracleHook
|
||||
|
||||
try:
|
||||
logger.debug("Attempting to connect to Oracle database...")
|
||||
hook = OracleHook(oracle_conn_id=conn_id)
|
||||
conn = hook.get_conn()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT 1 FROM dual")
|
||||
result = cursor.fetchone()
|
||||
logger.info(f"Connection test successful. Result: {result}")
|
||||
cursor.close()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Connection test failed: {str(e)}")
|
||||
raise
|
||||
|
||||
default_args = {
|
||||
'depends_on_past': False,
|
||||
'start_date': datetime(2025, 6, 25),
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(seconds=15),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
'oracle_plsql_test_dag',
|
||||
default_args=default_args,
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
) as dag:
|
||||
|
||||
test_connection = PythonOperator(
|
||||
task_id='test_oracle_connection',
|
||||
python_callable=test_oracle_connection,
|
||||
)
|
||||
|
||||
# With named parameter
|
||||
run_plsql = SQLExecuteQueryOperator(
|
||||
task_id='run_plsql_procedure',
|
||||
conn_id="marsdb_loader",
|
||||
sql="""
|
||||
BEGIN
|
||||
DATA_REPLICATOR.export_table(
|
||||
p_table_owner => 'c2d',
|
||||
p_table_name => 't_all_assets_servicer',
|
||||
p_objectstore_uri => 'https://oci-test-sani.bucket.vpce-0b3a5f000733397b0-kxlyoh5z.s3.eu-central-1.vpce.amazonaws.com/',
|
||||
p_date_column => 'SNAPSHOT_DATE'
|
||||
);
|
||||
END;
|
||||
""",
|
||||
)
|
||||
|
||||
test_connection >> run_plsql
|
||||
171
airflow/TestDags/archive/old_devo_rqsd_mrds_workflow.py
Normal file
171
airflow/TestDags/archive/old_devo_rqsd_mrds_workflow.py
Normal file
@@ -0,0 +1,171 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
# Import your functions
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_connector import main as devo_main
|
||||
from devo_connector_v2 import run as devo_main2
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
# Default arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
# Dynamic name extraction from basename
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow with MRDS processing',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["Devo", "RQSD", "MRDS", "Connector"],
|
||||
params={
|
||||
"source_filename": "",
|
||||
"config_file": "",
|
||||
},
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
"""Run Devo RQSD connector workflow"""
|
||||
try:
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(f"Starting Devo RQSD workflow from Airflow DAG for env '{env}'")
|
||||
|
||||
database_name = 'MOPDB'
|
||||
workflow_name = 'w_MOPDB_RQSD_PROCESS'
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
|
||||
#comment
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
count = devo_main2(workflow_context, flow_config_path, env_config_path, env)
|
||||
print("=================================================================")
|
||||
print(f"Devo RQSD workflow completed successfully with count : {count}")
|
||||
logging.info(f"Devo RQSD workflow completed successfully with count : {count}")
|
||||
|
||||
# Push the workflow context and history key to XCom for downstream tasks
|
||||
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
context['ti'].xcom_push(key='workflow_context', value=workflow_context)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
# If init_workflow succeeded but workflow failed, finalize with FAILED status
|
||||
if 'a_workflow_history_key' in locals():
|
||||
try:
|
||||
finalise_workflow(a_workflow_history_key, "FAILED")
|
||||
except Exception as finalise_error:
|
||||
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
|
||||
raise
|
||||
|
||||
def run_mrds_task(**context):
|
||||
"""Run MRDS processing task"""
|
||||
try:
|
||||
|
||||
ti = context.get('ti')
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='run_devo_connector_rqsd')
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from Task 1")
|
||||
|
||||
print("=== workflow_context ====:",workflow_context)
|
||||
|
||||
source_filename = "RQSD_OBSERVATIONS.csv"
|
||||
config_file = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_devo_process.yaml"
|
||||
|
||||
print("---- run_mrds_task ----")
|
||||
print("source_filename :", source_filename)
|
||||
print("config_file = ", config_file)
|
||||
print("------------------------")
|
||||
|
||||
|
||||
if not source_filename:
|
||||
raise ValueError("No source_filename provided in DAG run params.")
|
||||
if not config_file:
|
||||
raise ValueError("No config_file path provided in DAG run params.")
|
||||
|
||||
logging.info(f"Starting MRDS task with source_filename: {source_filename}, config_file: {config_file}")
|
||||
|
||||
# Run MRDS with the workflow context from the previous task
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=True)
|
||||
|
||||
logging.info("MRDS task completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running MRDS task: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow with SUCCESS status"""
|
||||
# Pull the workflow_history_key from XCom pushed by the main task
|
||||
ti = context['ti']
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Call finalise with SUCCESS status
|
||||
finalise_workflow(a_workflow_history_key, "SUCCESS")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
|
||||
# Task definitions
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
run_mrds = PythonOperator(
|
||||
task_id='run_mrds_task',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize = PythonOperator(
|
||||
task_id='finalise_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
# Task dependencies
|
||||
run_devo >> run_mrds >> finalize
|
||||
320
airflow/TestDags/archive/rqsd_devo_replicator_2.py
Normal file
320
airflow/TestDags/archive/rqsd_devo_replicator_2.py
Normal file
@@ -0,0 +1,320 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import yaml
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException
|
||||
except Exception: # fallback for older Airflow
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
|
||||
# --- Custom module paths (as in snippet) ---
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
sys.path.append('/opt/airflow/python/devo_replicator/data_replicator')
|
||||
|
||||
# --- custom imports ---
|
||||
from mrds.utils import oraconn
|
||||
from impala_refresher import main as impala_main
|
||||
|
||||
# --- Config path ---
|
||||
ENV_CONFIG_PATH = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_devo_replicator_2',
|
||||
default_args=default_args,
|
||||
description='Run Devo replicator workflow',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['Devo', 'RQSD', 'Replicator'],
|
||||
) as dag:
|
||||
|
||||
# -------------------------------
|
||||
# 1) Init: read config + set XCom
|
||||
# -------------------------------
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = conf.get("env") or os.getenv("MRDS_ENV", "dev").lower()
|
||||
if env not in {"dev", "tst"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev' or 'tst'.")
|
||||
|
||||
# hardcoded the mopdb
|
||||
# ====================================
|
||||
store = "mopdb"
|
||||
# ====================================
|
||||
|
||||
if store not in {"mopdb", "rar"}:
|
||||
raise ValueError(f"Unsupported store '{store}'. Expected 'mopdb' or 'rar'.")
|
||||
|
||||
p_service_name = "MOPDB" if store == "mopdb" else "RAR"
|
||||
p_table_owner = "MPEC"
|
||||
p_table_name = "T_MPEC"
|
||||
|
||||
with open(ENV_CONFIG_PATH, "r") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
env_cfg = cfg[env]
|
||||
store_cfg = cfg[store]
|
||||
|
||||
p_objectstore_uri = env_cfg["S3_LOCATION_URI"].replace("{0}",store.lower())
|
||||
|
||||
p_run_id = str(ti.run_id)
|
||||
logging.info("=== init_step === env=%s store=%s run_id=%s", env, store, p_run_id)
|
||||
logging.info("objectstore_uri=%s", p_objectstore_uri)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"config_path": ENV_CONFIG_PATH,
|
||||
"p_run_id": p_run_id,
|
||||
"p_service_name": p_service_name,
|
||||
"p_table_owner": p_table_owner,
|
||||
"p_table_name": p_table_name,
|
||||
"p_objectstore_uri": p_objectstore_uri,
|
||||
"corporate_store": store_cfg["corporate_store"], # "crp_mopdb" or "crp_rar"
|
||||
}
|
||||
|
||||
print(" ============= DEBUG PARAMS ============= ")
|
||||
print(xcom)
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# ------------------------------------
|
||||
# 2) log table (Oracle procedure)
|
||||
# ------------------------------------
|
||||
def start_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
|
||||
# Get parameters from XCom
|
||||
p_run_id = ti.xcom_pull(task_ids='init_step', key='p_run_id')
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# Create Oracle connection for this task
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
logging.info("Oracle connection established successfully for start_log_table")
|
||||
|
||||
# Execute Oracle procedure using oraconn.run_proc() directly
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.start_log_table',
|
||||
[p_run_id, p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("start_log_table procedure executed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in start_log_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
try:
|
||||
oracle_conn.close()
|
||||
logging.info("Oracle connection closed for start_log_table")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing connection in start_log_table: {e}")
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='start_log_table',
|
||||
python_callable=start_log_table_task,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 3) Export table (Oracle procedure writes to object store)
|
||||
# ---------------------------------------------------------
|
||||
def export_table_task(**context):
|
||||
ti = context["ti"]
|
||||
|
||||
# Get parameters from XCom
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
|
||||
|
||||
# Create Oracle connection for this task
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
logging.info("Oracle connection established successfully for export_table")
|
||||
|
||||
# Execute Oracle procedure using oraconn.run_proc() directly
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.export_table',
|
||||
[p_service_name, p_table_owner, p_table_name, p_objectstore_uri]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("export_table procedure executed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in export_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
try:
|
||||
oracle_conn.close()
|
||||
logging.info("Oracle connection closed for export_table")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing connection in export_table: {e}")
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='export_table',
|
||||
python_callable=export_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Continue even if t1 failed
|
||||
)
|
||||
|
||||
# ---------------------------------------------
|
||||
# 4) Devo / Impyla refresh (Python Package)
|
||||
# ---------------------------------------------
|
||||
def devo_impyla_task(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
corporate_store = ti.xcom_pull(task_ids='init_step', key='corporate_store')
|
||||
config_path = ti.xcom_pull(task_ids='init_step', key='config_path')
|
||||
owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
table = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# For Impala: corporate_store.table_name (e.g., crp_mopdb.T_MPEC)
|
||||
# NOT corporate_store.owner.table_name (which would be crp_mopdb.MPEC.T_MPEC - INVALID)
|
||||
table_name = table # Just "T_MPEC"
|
||||
|
||||
logging.info(
|
||||
"Starting Impyla refresh with env=%s store=%s corporate_store=%s table=%s",
|
||||
env, store, corporate_store, table_name
|
||||
)
|
||||
logging.info("Will execute: INVALIDATE METADATA %s.%s", corporate_store, table_name)
|
||||
logging.info("Will execute: COMPUTE STATS %s.%s", corporate_store, table_name)
|
||||
|
||||
try:
|
||||
# This should result in queries like:
|
||||
# INVALIDATE METADATA crp_mopdb.T_MPEC
|
||||
# COMPUTE STATS crp_mopdb.T_MPEC
|
||||
status = impala_main(config_path, env, table_name, corporate_store)
|
||||
logging.info("Impyla (Devo) task finished successfully. Status: %s", status)
|
||||
return status
|
||||
except Exception as e:
|
||||
logging.error(f"Error in devo_impyla_task: {e}")
|
||||
raise
|
||||
|
||||
t3 = PythonOperator(
|
||||
task_id='devo_impyla',
|
||||
python_callable=devo_impyla_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Continue even if t2 failed
|
||||
)
|
||||
|
||||
# -------------------------------------
|
||||
# 5) End log table (always executes after t1, t2, t3 complete - regardless of success/failure)
|
||||
# -------------------------------------
|
||||
def end_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
|
||||
# Get parameters from XCom
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# Create Oracle connection for this task
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
logging.info("Oracle connection established successfully for end_log_table")
|
||||
|
||||
# Execute Oracle procedure using oraconn.run_proc() directly
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.end_log_table',
|
||||
[p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("end_log_table procedure executed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in end_log_table: {e}")
|
||||
# Don't raise the exception since this is a cleanup task
|
||||
logging.info("Continuing despite end_log_table error (cleanup task)")
|
||||
finally:
|
||||
if oracle_conn:
|
||||
try:
|
||||
oracle_conn.close()
|
||||
logging.info("Oracle connection closed for end_log_table")
|
||||
except Exception as e:
|
||||
logging.error(f"Error closing connection in end_log_table: {e}")
|
||||
|
||||
t4 = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Run after t1, t2, t3 complete (success or failure)
|
||||
)
|
||||
|
||||
# -----------------------------------------------------
|
||||
# 6) Check and fail the DAG if any of t1..t3 actually failed
|
||||
# This task always runs after t4, but will fail the DAG if needed
|
||||
# -----------------------------------------------------
|
||||
def fail_if_any_failed(**context):
|
||||
dag_run = context['dag_run']
|
||||
check_tasks = ['start_log_table', 'export_table', 'devo_impyla']
|
||||
failed = []
|
||||
|
||||
for tid in check_tasks:
|
||||
ti_up = dag_run.get_task_instance(tid)
|
||||
if ti_up and ti_up.state == 'failed':
|
||||
failed.append(tid)
|
||||
|
||||
if failed:
|
||||
error_msg = f"Critical task(s) failed: {', '.join(failed)}. DAG execution failed."
|
||||
logging.error(error_msg)
|
||||
raise AirflowFailException(error_msg)
|
||||
|
||||
logging.info("All critical tasks completed successfully: %s", check_tasks)
|
||||
|
||||
t5 = PythonOperator(
|
||||
task_id='fail_if_any_failed',
|
||||
python_callable=fail_if_any_failed,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Always run after t4
|
||||
)
|
||||
|
||||
# ---------
|
||||
# Task Dependencies - SEQUENTIAL
|
||||
# ---------
|
||||
# Sequential flow: init -> t1 -> t2 -> t3
|
||||
init >> t1 >> t2 >> t3
|
||||
|
||||
# t4 runs after t1, t2, t3 are all done (regardless of success/failure)
|
||||
[t1, t2, t3] >> t4
|
||||
|
||||
# t5 always runs after t4 to check for failures and fail the DAG if needed
|
||||
t4 >> t5
|
||||
18
airflow/TestDags/archive/test_oracle_connection.py
Normal file
18
airflow/TestDags/archive/test_oracle_connection.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from airflow import DAG
|
||||
#from airflow.providers.oracle.operators.oracle import OracleOperator
|
||||
from airflow.operators.bash import BashOperator
|
||||
from datetime import datetime
|
||||
from airflow import DAG
|
||||
from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
|
||||
|
||||
with DAG(
|
||||
'test_oracle_connection',
|
||||
start_date=datetime(2025, 6, 13),
|
||||
schedule_interval=None
|
||||
) as dag:
|
||||
test_query = SQLExecuteQueryOperator(
|
||||
task_id='test_oracle_query',
|
||||
conn_id='oracle_default',
|
||||
sql='SELECT 1 FROM DUAL'
|
||||
)
|
||||
|
||||
244
airflow/TestDags/ods_exdi_multi_task_processor.py
Normal file
244
airflow/TestDags/ods_exdi_multi_task_processor.py
Normal file
@@ -0,0 +1,244 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
|
||||
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
|
||||
# Configuration Dictionary - First key will be mandatory,
|
||||
# Workflow: Init → First Task (Sequential) → Parallel Tasks → Finalize
|
||||
# Input: Only needs PARSE.yaml config file and source filename
|
||||
|
||||
TASK_CONFIGS = {
|
||||
"m_ODS_EXDI_TASK1": {
|
||||
"source_filename": "EXDI_TASK1.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK1_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_EXDI_TASK2": {
|
||||
"source_filename": "EXDI_TASK2.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK2_PARSE.yaml"
|
||||
},
|
||||
"m_ODS_EXDI_TASK3": {
|
||||
"source_filename": "EXDI_TASK3.csv",
|
||||
"config_file": "/opt/airflow/src/airflow/dags/ods/exdi/exdi_process/config/yaml/m_ODS_EXDI_TASK3_PARSE.yaml"
|
||||
}
|
||||
}
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run EXDI data processing workflow with MRDS - Multi-task',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["EXDI", "MRDS", "Multi-Task", "ODS"]
|
||||
) as dag:
|
||||
|
||||
def init_workflow_task(**context):
|
||||
"""Initialize workflow and set up context"""
|
||||
try:
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
env = os.getenv("MRDS_ENV", "dev")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
missing_vars = []
|
||||
if not username: missing_vars.append("MRDS_LOADER_DB_USER")
|
||||
if not password: missing_vars.append("MRDS_LOADER_DB_PASS")
|
||||
if not tnsalias: missing_vars.append("MRDS_LOADER_DB_TNS")
|
||||
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
|
||||
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
# Push context to XCom for downstream tasks
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
ti.xcom_push(key='workflow_context', value=workflow_context)
|
||||
ti.xcom_push(key='env', value=env)
|
||||
|
||||
logging.info("Workflow initialization completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error initializing workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def run_mrds_task(**context):
|
||||
"""Run MRDS processing task for EXDI"""
|
||||
try:
|
||||
ti = context['ti']
|
||||
task_id = context['task'].task_id
|
||||
|
||||
# Extract task name from task_id
|
||||
task_name = task_id.replace('_PARSE', '') if task_id.endswith('_PARSE') else task_id
|
||||
|
||||
# Get task configuration
|
||||
task_config = TASK_CONFIGS.get(task_name)
|
||||
if not task_config:
|
||||
raise ValueError(f"No configuration found for task: {task_name}")
|
||||
|
||||
source_filename = task_config["source_filename"]
|
||||
config_file = task_config["config_file"]
|
||||
|
||||
# Get context from init task
|
||||
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
|
||||
|
||||
if not workflow_context:
|
||||
raise ValueError("No workflow_context from init task")
|
||||
|
||||
# Verify config file exists
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"PARSE config file not found: {config_file}")
|
||||
|
||||
logging.info(f"Processing EXDI MRDS with source_filename: {source_filename}, config_file: {config_file}")
|
||||
|
||||
# Run MRDS - file should already be in bucket from EXDI flow
|
||||
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
|
||||
|
||||
logging.info(f"EXDI MRDS task completed successfully for {task_name}")
|
||||
|
||||
# Push success status to XCom
|
||||
ti.xcom_push(key='mrds_success', value=True)
|
||||
ti.xcom_push(key='task_status', value='SUCCESS')
|
||||
|
||||
return "SUCCESS"
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running EXDI MRDS task: {e}", exc_info=True)
|
||||
|
||||
# Push failure status to XCom
|
||||
ti = context['ti']
|
||||
ti.xcom_push(key='mrds_success', value=False)
|
||||
ti.xcom_push(key='task_status', value='FAILED')
|
||||
ti.xcom_push(key='error_message', value=str(e))
|
||||
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow based on overall execution results"""
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Check all task statuses
|
||||
workflow_success = True
|
||||
failure_reasons = []
|
||||
|
||||
for task_name in TASK_CONFIGS.keys():
|
||||
mrds_task_id = f'{task_name}_PARSE'
|
||||
mrds_task = dag_run.get_task_instance(mrds_task_id)
|
||||
|
||||
if mrds_task.state == 'failed':
|
||||
workflow_success = False
|
||||
try:
|
||||
error_msg = ti.xcom_pull(key='error_message', task_ids=mrds_task_id)
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed - {error_msg}")
|
||||
except:
|
||||
failure_reasons.append(f"{task_name}: MRDS task failed")
|
||||
|
||||
# Finalize workflow
|
||||
if workflow_success:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info(f"Finalised EXDI workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
else:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error(f"Finalised EXDI workflow with history key {a_workflow_history_key} as FAILED")
|
||||
raise AirflowFailException(f"EXDI Workflow failed: {', '.join(failure_reasons)}")
|
||||
|
||||
except AirflowFailException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error finalizing EXDI workflow: {e}", exc_info=True)
|
||||
try:
|
||||
if 'a_workflow_history_key' in locals() and a_workflow_history_key:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
except:
|
||||
pass
|
||||
raise AirflowFailException(f"EXDI Workflow finalization failed: {e}")
|
||||
|
||||
# Create tasks
|
||||
init_workflow = PythonOperator(
|
||||
task_id='init_workflow',
|
||||
python_callable=init_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize_workflow = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Get task names - first task runs sequentially, others in parallel
|
||||
task_names = list(TASK_CONFIGS.keys())
|
||||
first_task_name = task_names[0]
|
||||
parallel_task_names = task_names[1:]
|
||||
|
||||
# Create first task (sequential)
|
||||
first_mrds_task = PythonOperator(
|
||||
task_id=f'{first_task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
# Set dependencies for first task
|
||||
init_workflow >> first_mrds_task >> finalize_workflow
|
||||
|
||||
# Create parallel tasks
|
||||
for task_name in parallel_task_names:
|
||||
mrds_task = PythonOperator(
|
||||
task_id=f'{task_name}_PARSE',
|
||||
python_callable=run_mrds_task,
|
||||
provide_context=True,
|
||||
)
|
||||
# Parallel tasks start after first task completes
|
||||
first_mrds_task >> mrds_task >> finalize_workflow
|
||||
|
||||
logging.info(f"EXDI DAG created with {len(TASK_CONFIGS)} tasks: {list(TASK_CONFIGS.keys())}")
|
||||
44
airflow/TestDags/pen_test_demo.py
Normal file
44
airflow/TestDags/pen_test_demo.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from datetime import datetime
|
||||
from airflow import DAG
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
|
||||
|
||||
# RUN it if aiflow wokrs or not.
|
||||
|
||||
def print_statement():
|
||||
print("Hello from the Python function!!!!!")
|
||||
|
||||
def print_message():
|
||||
print("Last message from Python!, hope things are going good")
|
||||
|
||||
with DAG(
|
||||
'demo_task_workflow',
|
||||
start_date=datetime(2025, 6, 13),
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
) as dag:
|
||||
|
||||
task1 = BashOperator(
|
||||
task_id='print_with_bash',
|
||||
bash_command='echo "Lets begin"',
|
||||
)
|
||||
|
||||
task2 = PythonOperator(
|
||||
task_id='print_with_python',
|
||||
python_callable=print_statement,
|
||||
)
|
||||
|
||||
task3 = BashOperator(
|
||||
task_id='another_bash_task',
|
||||
bash_command='echo "So far so good!"',
|
||||
)
|
||||
|
||||
task4 = PythonOperator(
|
||||
task_id='another_python_task',
|
||||
python_callable=print_message,
|
||||
)
|
||||
|
||||
|
||||
task1 >> task2 >> task3 >> task4
|
||||
|
||||
95
airflow/TestDags/rqsd_casper_connector_test.py
Normal file
95
airflow/TestDags/rqsd_casper_connector_test.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
|
||||
### CASPER CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
|
||||
# Importing custom module#s
|
||||
sys.path.append('/opt/airflow/python/connectors/casper')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
"""username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS") """
|
||||
|
||||
# connstr = f"{username}/{password}@{tnsalias}"
|
||||
|
||||
# Importing the main function from casper script
|
||||
from casper_rqsd import main as casper_main
|
||||
|
||||
# Default DAG arguments
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 2,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='rqsd_casper_connector_test',
|
||||
default_args=default_args,
|
||||
description='Run Casper RQSD data ingestion workflow',
|
||||
schedule_interval=None, # we can set later
|
||||
#start_date=datetime(2025, 10, 7),
|
||||
catchup=False,
|
||||
tags=['Casper', 'RQSD', 'Connector'],
|
||||
) as dag:
|
||||
|
||||
|
||||
def run_casper_rqsd(**context):
|
||||
try:
|
||||
#workflow_context = {"run_id": 34, "a_workflow_history_key": 6}
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_casper.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/casper/config/env_config.yaml"
|
||||
|
||||
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
# Pick env from ENV variables
|
||||
env = os.getenv("MRDS_ENV")
|
||||
#env = os.getenv("MRDS_ENV", "lab")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'"
|
||||
)
|
||||
|
||||
#Calling main()
|
||||
casper_main(workflow_context, flow_config_path, env_config_path, env)
|
||||
|
||||
logging.info("Casper RQSD workflow completed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Casper RQSD workflow: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
run_casper = PythonOperator(
|
||||
task_id='run_casper_rqsd',
|
||||
python_callable=run_casper_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
117
airflow/TestDags/t_MOPDB_RQSD_DEVO_OBSERVATIONS.py
Normal file
117
airflow/TestDags/t_MOPDB_RQSD_DEVO_OBSERVATIONS.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import sys
|
||||
import os
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
|
||||
### DEVO CONNECTOR WITH DYNAMIC WORKFLOW CONTEXT & HISTORY KEY
|
||||
|
||||
# Importing custom modules
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
|
||||
|
||||
|
||||
# Import your functions from manage_runs and devo_connector as before
|
||||
from mrds.utils.manage_runs import init_workflow, finalise_workflow
|
||||
from devo_connector import main as devo_main
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 3,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
# dynamic name extracton from basename
|
||||
dag_id = os.path.splitext(os.path.basename(__file__))[0]
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='Run devo RQSD data ingestion workflow',
|
||||
schedule_interval=None,
|
||||
catchup=False,
|
||||
tags=["Devo", "RQSD", "Connector"],
|
||||
) as dag:
|
||||
|
||||
def run_devo_connector_rqsd(**context):
|
||||
try:
|
||||
env = os.getenv("MRDS_ENV")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
if not all([username, password, tnsalias]):
|
||||
raise ValueError(
|
||||
"Missing one or more required environment variables: "
|
||||
"MRDS_LOADER_DB_USER, MRDS_LOADER_DB_PASS, MRDS_LOADER_DB_TNS"
|
||||
)
|
||||
|
||||
logging.info(f"Starting Casper RQSD workflow from Airflow DAG for env '{env}'")
|
||||
print("======== THIS ... =========")
|
||||
print("======== THIS ... =========")
|
||||
database_name = 'MOPDB'
|
||||
workflow_name = 'w_MOPDB_RQSD_PROCESS'
|
||||
workflow_run_id = str(context['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = init_workflow(database_name, workflow_name, workflow_run_id)
|
||||
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
|
||||
|
||||
workflow_context = {
|
||||
"run_id": workflow_run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
flow_config_path = "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/flow_config_rqsd_observations.yaml"
|
||||
env_config_path = "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml"
|
||||
|
||||
logging.info("Starting Devo RQSD workflow from Airflow DAG")
|
||||
devo_main(workflow_context, flow_config_path, env_config_path, env)
|
||||
logging.info("Devo RQSD workflow completed successfully")
|
||||
|
||||
# Push the workflow history key to XCom for downstream tasks
|
||||
context['ti'].xcom_push(key='workflow_history_key', value=a_workflow_history_key)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error running Devo RQSD workflow: {e}", exc_info=True)
|
||||
# If init_workflow succeeded but workflow failed, finalize with FAILED status
|
||||
# Attempt to retrieve the key to finalize
|
||||
if 'a_workflow_history_key' in locals():
|
||||
try:
|
||||
finalise_workflow(a_workflow_history_key, "FAILED")
|
||||
except Exception as finalise_error:
|
||||
logging.error(f"Failed to finalise workflow after error: {finalise_error}")
|
||||
raise
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
# Pull the workflow_history_key from XCom pushed by the main task
|
||||
ti = context['ti']
|
||||
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='run_devo_connector_rqsd')
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
|
||||
|
||||
# Call finalise with SUCCESS status
|
||||
finalise_workflow(a_workflow_history_key, "SUCCESS")
|
||||
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
|
||||
|
||||
run_devo = PythonOperator(
|
||||
task_id='run_devo_connector_rqsd',
|
||||
python_callable=run_devo_connector_rqsd,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
finalize = PythonOperator(
|
||||
task_id='finalise_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
run_devo >> finalize
|
||||
0
airflow/devo_replicator/.gitkeep
Normal file
0
airflow/devo_replicator/.gitkeep
Normal file
346
airflow/devo_replicator/devo_replicator_core.py
Normal file
346
airflow/devo_replicator/devo_replicator_core.py
Normal file
@@ -0,0 +1,346 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import yaml
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.python import PythonOperator
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/python/devo_replicator/data_replicator')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
from impala_refresher import main as impala_main
|
||||
|
||||
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
|
||||
|
||||
ENV_CONFIG_PATH = "/opt/airflow/python/devo_replicator/config/env_config.yaml"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_core',
|
||||
default_args=default_args,
|
||||
description='Core Devo replicator workflow for single table',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator'],
|
||||
max_active_runs=10,
|
||||
max_active_tasks=16,
|
||||
) as dag:
|
||||
|
||||
# Init - read config from context
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = conf.get("store")
|
||||
if not store:
|
||||
raise ValueError("store parameter is required")
|
||||
store = store.lower()
|
||||
|
||||
owner_table = conf.get("owner_table")
|
||||
if not owner_table or '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst","acc","prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
if store not in {"mopdb", "rar", "rqsd"}:
|
||||
raise ValueError(f"Unsupported store '{store}'. Expected 'mopdb', 'rar', 'rqsd'.")
|
||||
|
||||
if store == "mopdb":
|
||||
p_service_name = "MOPDB"
|
||||
elif store == "rar":
|
||||
p_service_name = "RAR"
|
||||
elif store == "rqsd":
|
||||
p_service_name = "RQSD"
|
||||
|
||||
with open(ENV_CONFIG_PATH, "r") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
env_cfg = cfg[env]
|
||||
store_cfg = cfg[store]
|
||||
p_objectstore_uri = env_cfg["S3_LOCATION_URI"].replace("{0}", store.lower())
|
||||
|
||||
# Get verified run_id using security utilities
|
||||
p_run_id = get_verified_run_id(context)
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s table=%s.%s run_id=%s",
|
||||
env, store, table_owner, table_name, p_run_id)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"config_path": ENV_CONFIG_PATH,
|
||||
"p_run_id": p_run_id,
|
||||
"p_service_name": p_service_name,
|
||||
"p_table_owner": table_owner,
|
||||
"p_table_name": table_name,
|
||||
"p_objectstore_uri": p_objectstore_uri,
|
||||
"corporate_store": store_cfg["corporate_store"],
|
||||
"owner_table": owner_table,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Start log table
|
||||
def start_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
p_run_id = ti.xcom_pull(task_ids='init_step', key='p_run_id')
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.start_log_table',
|
||||
[p_run_id, p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("start_log_table procedure executed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error in start_log_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='start_log_table',
|
||||
python_callable=start_log_table_task,
|
||||
)
|
||||
|
||||
# Export table
|
||||
def export_table_task(**context):
|
||||
ti = context["ti"]
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.export_table',
|
||||
[p_service_name, p_table_owner, p_table_name, p_objectstore_uri]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("export_table procedure executed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error in export_table: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='export_table',
|
||||
python_callable=export_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Check if previous tasks succeeded before triggering child DAG
|
||||
def check_previous_tasks_success(**context):
|
||||
ti = context["ti"]
|
||||
dag_run = context['dag_run']
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
|
||||
check_tasks = ['start_log_table', 'export_table']
|
||||
failed = []
|
||||
|
||||
for tid in check_tasks:
|
||||
ti_up = dag_run.get_task_instance(tid)
|
||||
if ti_up and ti_up.state != 'success':
|
||||
failed.append(f"{tid}:{ti_up.state}")
|
||||
|
||||
if failed:
|
||||
error_msg = f"Cannot proceed with {store} table generator. Previous tasks not successful: {', '.join(failed)}"
|
||||
logging.error(error_msg)
|
||||
raise AirflowFailException(error_msg)
|
||||
|
||||
logging.info(f"All previous tasks succeeded. Ready to trigger {store} table generator.")
|
||||
return True
|
||||
|
||||
t3_check = PythonOperator(
|
||||
task_id='check_previous_tasks_success',
|
||||
python_callable=check_previous_tasks_success,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
def drop_table(**context):
|
||||
ti = context["ti"]
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
p_objectstore_uri = ti.xcom_pull(task_ids='init_step', key='p_objectstore_uri')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.clear_s3_bucket',
|
||||
[p_service_name, p_table_owner, p_table_name + '_COPY', p_objectstore_uri]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("clear_s3_bucket for table {0} procedure executed successfully".format(p_table_name[:-5].lower()))
|
||||
except Exception as e:
|
||||
logging.error(f"Error in clear_s3_bucket: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t3_drop = PythonOperator(
|
||||
task_id='drop_table',
|
||||
python_callable=drop_table,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Trigger table generator DAG based on store
|
||||
def trigger_table_generator(**context):
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
# Determine target DAG based on store
|
||||
if store == "mopdb":
|
||||
target_dag_id = 'devo_table_generator_trigger_mopdb'
|
||||
elif store == "rar":
|
||||
target_dag_id = 'devo_table_generator_trigger_rar'
|
||||
elif store == "rqsd":
|
||||
target_dag_id = 'devo_table_generator_trigger_rqsd'
|
||||
else:
|
||||
raise ValueError(f"Unsupported store: {store}")
|
||||
|
||||
# Add _COPY suffix to table name for the target table
|
||||
owner_table_with_copy = f"{table_owner}.{table_name}_COPY"
|
||||
|
||||
# Create configuration dictionary
|
||||
trigger_conf = {
|
||||
"owner_table": owner_table_with_copy
|
||||
}
|
||||
|
||||
logging.info(f"Triggering {target_dag_id} with conf: {trigger_conf}")
|
||||
|
||||
try:
|
||||
dag_run = trigger_dag(
|
||||
dag_id=target_dag_id,
|
||||
conf=trigger_conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
logging.info(f"Successfully triggered {target_dag_id}, run_id: {dag_run.run_id}")
|
||||
ti.xcom_push(key='triggered_dag_run_id', value=dag_run.run_id)
|
||||
ti.xcom_push(key='triggered_dag_id', value=target_dag_id)
|
||||
|
||||
return dag_run.run_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error triggering {target_dag_id}: {e}")
|
||||
raise
|
||||
|
||||
t3_trigger = PythonOperator(
|
||||
task_id='trigger_table_generator',
|
||||
python_callable=trigger_table_generator,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# End log table
|
||||
def end_log_table_task(**context):
|
||||
ti = context["ti"]
|
||||
p_service_name = ti.xcom_pull(task_ids='init_step', key='p_service_name')
|
||||
p_table_owner = ti.xcom_pull(task_ids='init_step', key='p_table_owner')
|
||||
p_table_name = ti.xcom_pull(task_ids='init_step', key='p_table_name')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
oraconn.run_proc(
|
||||
oracle_conn,
|
||||
'MRDS_LOADER.DATA_REPLICATOR.end_log_table',
|
||||
[p_service_name, p_table_owner, p_table_name]
|
||||
)
|
||||
oracle_conn.commit()
|
||||
logging.info("end_log_table procedure executed successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error in end_log_table: {e}")
|
||||
logging.info("Continuing despite end_log_table error (cleanup task)")
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t4 = PythonOperator(
|
||||
task_id='end_log_table',
|
||||
python_callable=end_log_table_task,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Check status and fail if needed
|
||||
def fail_if_any_failed(**context):
|
||||
dag_run = context['dag_run']
|
||||
check_tasks = ['start_log_table', 'export_table', 'check_previous_tasks_success', 'trigger_table_generator']
|
||||
failed = []
|
||||
|
||||
for tid in check_tasks:
|
||||
ti_up = dag_run.get_task_instance(tid)
|
||||
if ti_up and ti_up.state == 'failed':
|
||||
failed.append(tid)
|
||||
|
||||
if failed:
|
||||
error_msg = f"Critical task(s) failed: {', '.join(failed)}. DAG execution failed."
|
||||
logging.error(error_msg)
|
||||
raise AirflowFailException(error_msg)
|
||||
|
||||
logging.info("All critical tasks completed successfully: %s", check_tasks)
|
||||
|
||||
t5 = PythonOperator(
|
||||
task_id='fail_if_any_failed',
|
||||
python_callable=fail_if_any_failed,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2 >> t3_check >> t3_drop >> t3_trigger
|
||||
[t1, t2, t3_trigger] >> t4
|
||||
t4 >> t5
|
||||
239
airflow/devo_replicator/devo_replicator_core_pandas.py
Normal file
239
airflow/devo_replicator/devo_replicator_core_pandas.py
Normal file
@@ -0,0 +1,239 @@
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.hooks.S3_hook import S3Hook
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import io
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
|
||||
SERVICE_NAME = "SERVICE_NAME"
|
||||
OWNER = "C2D"
|
||||
TABLE_NAME = "T_CEPH"
|
||||
METADATA_OWNER = "CT_MOPDB"
|
||||
METADATA_TABLE = "mopdb_metadata_inventory"
|
||||
|
||||
USE_LOCAL_STORAGE = True
|
||||
LOCAL_OUTPUT_DIR = "/tmp/devo_replicator_output"
|
||||
|
||||
S3_BUCKET = "bucket-name"
|
||||
S3_PREFIX = "devo/replicator/C2D/T_CEPH/"
|
||||
AWS_CONN_ID = "aws_default"
|
||||
|
||||
DEFAULT_ARGS = {
|
||||
"owner": "airflow",
|
||||
"depends_on_past": False,
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 1,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
}
|
||||
DAG_ID = "devo_replicator_pandas"
|
||||
SCHEDULE_INTERVAL = None
|
||||
CHUNK_SIZE = 100000
|
||||
|
||||
|
||||
def query_oracle_template(owner, table_name):
|
||||
try:
|
||||
input_query = """SELECT
|
||||
COLUMN_NAME,
|
||||
DATA_TYPE,
|
||||
CHAR_LENGTH,
|
||||
DATA_PRECISION,
|
||||
DATA_SCALE,
|
||||
COLUMN_ID,
|
||||
CASE
|
||||
WHEN DATA_TYPE = 'DATE' OR DATA_TYPE LIKE '%TIMESTAMP%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(100)) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'VARCHAR2' OR DATA_TYPE LIKE '%CHAR%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(' || CAST(CHAR_LENGTH AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL AND DATA_SCALE IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ',' || CAST(DATA_SCALE AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'CLOB' THEN
|
||||
'TO_CHAR(SUBSTR(' || COLUMN_NAME || ', 1, 32767)) AS ' || COLUMN_NAME
|
||||
ELSE
|
||||
COLUMN_NAME
|
||||
END AS casting
|
||||
FROM {0}.{1}
|
||||
WHERE OWNER = '{2}' AND TABLE_NAME = '{3}' AND A_VALID_TO > SYSDATE
|
||||
ORDER BY COLUMN_ID""".format(METADATA_OWNER, METADATA_TABLE, owner, table_name)
|
||||
|
||||
conn = connect('MRDS_LOADER')
|
||||
df = pd.read_sql(input_query, conn)
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(f"No metadata found for {owner}.{table_name}")
|
||||
|
||||
output_query = 'SELECT ' + ', \n'.join(df['casting'].tolist()) + ' FROM {0}.{1}'.format(owner, table_name)
|
||||
|
||||
column_metadata = {}
|
||||
for _, row in df.iterrows():
|
||||
col_name = row['COLUMN_NAME']
|
||||
data_type = row['DATA_TYPE']
|
||||
|
||||
if data_type in ('NUMBER', 'DECIMAL', 'FLOAT', 'BINARY_FLOAT', 'BINARY_DOUBLE'):
|
||||
if pd.notna(row['DATA_SCALE']) and row['DATA_SCALE'] > 0:
|
||||
column_metadata[col_name] = 'float64'
|
||||
elif pd.notna(row['DATA_PRECISION']) and row['DATA_PRECISION'] <= 9:
|
||||
column_metadata[col_name] = 'Int32'
|
||||
elif pd.notna(row['DATA_PRECISION']) and row['DATA_PRECISION'] <= 18:
|
||||
column_metadata[col_name] = 'Int64'
|
||||
else:
|
||||
column_metadata[col_name] = 'float64'
|
||||
elif data_type == 'DATE' or 'TIMESTAMP' in data_type:
|
||||
column_metadata[col_name] = 'string'
|
||||
else:
|
||||
column_metadata[col_name] = 'string'
|
||||
|
||||
logging.info(f"Generated query template with {len(df)} columns")
|
||||
return output_query, column_metadata
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in query_oracle_template: {e}")
|
||||
raise
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
def query_oracle_and_generate_parquet(partition_num, partitions, sql, column_metadata, use_local):
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Starting processing (Mode: {'LOCAL' if use_local else 'S3'})")
|
||||
|
||||
partition_sql = f"""SELECT /*+ PARALLEL(t, {partitions}) */ *
|
||||
FROM (
|
||||
{sql}
|
||||
) t
|
||||
WHERE ORA_HASH(ROWID, {partitions - 1}) = {partition_num}"""
|
||||
|
||||
conn = connect("MRDS_LOADER")
|
||||
|
||||
if use_local:
|
||||
os.makedirs(LOCAL_OUTPUT_DIR, exist_ok=True)
|
||||
output_path = os.path.join(LOCAL_OUTPUT_DIR, f"partition_{partition_num:04d}.parquet")
|
||||
file_handle = open(output_path, 'wb')
|
||||
write_target = file_handle
|
||||
else:
|
||||
s3_hook = S3Hook(aws_conn_id=AWS_CONN_ID)
|
||||
s3_client = s3_hook.get_conn()
|
||||
s3_key = f"{S3_PREFIX}partition_{partition_num:04d}.parquet"
|
||||
buffer = io.BytesIO()
|
||||
write_target = buffer
|
||||
|
||||
try:
|
||||
chunk_iterator = pd.read_sql(partition_sql, conn, chunksize=CHUNK_SIZE)
|
||||
|
||||
pqwriter = None
|
||||
total_rows = 0
|
||||
chunk_count = 0
|
||||
|
||||
for chunk in chunk_iterator:
|
||||
for col, dtype in column_metadata.items():
|
||||
if col in chunk.columns:
|
||||
try:
|
||||
if dtype == 'string':
|
||||
chunk[col] = chunk[col].astype('string')
|
||||
elif dtype.startswith('Int'):
|
||||
chunk[col] = pd.to_numeric(chunk[col], errors='coerce').astype(dtype)
|
||||
elif dtype == 'float64':
|
||||
chunk[col] = pd.to_numeric(chunk[col], errors='coerce')
|
||||
except Exception as e:
|
||||
logging.warning(f"[Pandas-Partition {partition_num}] Could not optimize column {col}: {e}")
|
||||
|
||||
table = pa.Table.from_pandas(chunk, preserve_index=False)
|
||||
|
||||
if pqwriter is None:
|
||||
pqwriter = pq.ParquetWriter(
|
||||
write_target,
|
||||
table.schema,
|
||||
compression='snappy',
|
||||
use_dictionary=True,
|
||||
write_statistics=True,
|
||||
version='2.6',
|
||||
data_page_size=1024*1024,
|
||||
)
|
||||
|
||||
pqwriter.write_table(table)
|
||||
total_rows += len(chunk)
|
||||
chunk_count += 1
|
||||
|
||||
if chunk_count % 10 == 0:
|
||||
if use_local:
|
||||
file_size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||
else:
|
||||
file_size_mb = write_target.tell() / 1024 / 1024
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Processed {total_rows:,} rows, Size: {file_size_mb:.2f} MB")
|
||||
|
||||
if pqwriter:
|
||||
pqwriter.close()
|
||||
|
||||
if use_local:
|
||||
file_size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Completed - {total_rows:,} rows, {file_size_mb:.2f} MB saved to {output_path}")
|
||||
else:
|
||||
write_target.seek(0)
|
||||
buffer_size_mb = write_target.getbuffer().nbytes / 1024 / 1024
|
||||
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Uploading {buffer_size_mb:.2f} MB to s3://{S3_BUCKET}/{s3_key}")
|
||||
|
||||
s3_client.upload_fileobj(write_target, S3_BUCKET, s3_key)
|
||||
|
||||
logging.info(f"[Pandas-Partition {partition_num}] Completed - {total_rows:,} rows, {buffer_size_mb:.2f} MB uploaded to S3")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"[Pandas-Partition {partition_num}] Error: {e}")
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
if use_local:
|
||||
file_handle.close()
|
||||
else:
|
||||
write_target.close()
|
||||
|
||||
|
||||
def generate_tasks(dag, partitions, sql, column_metadata, use_local):
|
||||
tasks = []
|
||||
for partition_num in range(partitions):
|
||||
task = PythonOperator(
|
||||
task_id=f"generate_parquet_partition_{partition_num}",
|
||||
python_callable=query_oracle_and_generate_parquet,
|
||||
op_kwargs={
|
||||
"partition_num": partition_num,
|
||||
"partitions": partitions,
|
||||
"sql": sql,
|
||||
"column_metadata": column_metadata,
|
||||
"use_local": use_local
|
||||
},
|
||||
provide_context=True,
|
||||
dag=dag,
|
||||
)
|
||||
tasks.append(task)
|
||||
return tasks
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_pandas',
|
||||
default_args=DEFAULT_ARGS,
|
||||
description='Devo replicator using Pandas with dtype optimization',
|
||||
schedule_interval=SCHEDULE_INTERVAL,
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'Pandas'],
|
||||
max_active_runs=1,
|
||||
max_active_tasks=30,
|
||||
) as dag:
|
||||
|
||||
query, column_metadata = query_oracle_template(OWNER, TABLE_NAME)
|
||||
|
||||
PARTITIONS = 16
|
||||
|
||||
partition_tasks = generate_tasks(dag, PARTITIONS, query, column_metadata, USE_LOCAL_STORAGE)
|
||||
244
airflow/devo_replicator/devo_replicator_core_pyarrow.py
Normal file
244
airflow/devo_replicator/devo_replicator_core_pyarrow.py
Normal file
@@ -0,0 +1,244 @@
|
||||
from airflow import DAG
|
||||
from airflow.operators.python_operator import PythonOperator
|
||||
from airflow.hooks.S3_hook import S3Hook
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
import io
|
||||
import logging
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
SERVICE_NAME = "SERVICE_NAME"
|
||||
OWNER = "C2D"
|
||||
TABLE_NAME = "T_CEPH"
|
||||
METADATA_OWNER = "CT_MOPDB"
|
||||
METADATA_TABLE = "mopdb_metadata_inventory"
|
||||
|
||||
## need to be changed
|
||||
S3_BUCKET = "bucket-name"
|
||||
S3_PREFIX = "devo/replicator/C2D/T_CEPH/"
|
||||
AWS_CONN_ID = "aws_default"
|
||||
|
||||
DEFAULT_ARGS = {
|
||||
"owner": "airflow",
|
||||
"depends_on_past": False,
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 1,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
}
|
||||
DAG_ID = "devo_replicator_pyarrow"
|
||||
SCHEDULE_INTERVAL = None
|
||||
BATCH_SIZE = 100000
|
||||
|
||||
|
||||
def query_oracle_template(owner, table_name):
|
||||
try:
|
||||
input_query = """SELECT
|
||||
CASE
|
||||
WHEN DATA_TYPE = 'DATE' OR DATA_TYPE LIKE '%TIMESTAMP%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(100)) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'VARCHAR2' OR DATA_TYPE LIKE '%CHAR%' THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS VARCHAR2(' || CAST(CHAR_LENGTH AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL AND DATA_SCALE IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ',' || CAST(DATA_SCALE AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE IN ('NUMBER', 'DECIMAL') AND DATA_PRECISION IS NOT NULL THEN
|
||||
'CAST(' || COLUMN_NAME || ' AS ' || DATA_TYPE || '(' || CAST(DATA_PRECISION AS INT) || ')) AS ' || COLUMN_NAME
|
||||
WHEN DATA_TYPE = 'CLOB' THEN
|
||||
'TO_CHAR(SUBSTR(' || COLUMN_NAME || ', 1, 32767)) AS ' || COLUMN_NAME
|
||||
ELSE
|
||||
COLUMN_NAME
|
||||
END AS casting
|
||||
FROM {0}.{1}
|
||||
WHERE OWNER = '{2}' AND TABLE_NAME = '{3}' AND A_VALID_TO > SYSDATE
|
||||
ORDER BY COLUMN_ID""".format(METADATA_OWNER, METADATA_TABLE, owner, table_name)
|
||||
|
||||
conn = connect('MRDS_LOADER')
|
||||
df = pd.read_sql(input_query, conn)
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(f"No metadata found for {owner}.{table_name}")
|
||||
|
||||
output_query = 'SELECT ' + ', \n'.join(df['casting'].tolist()) + ' FROM {0}.{1}'.format(owner, table_name)
|
||||
|
||||
logging.info(f"Generated query template with {len(df)} columns")
|
||||
return output_query
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in query_oracle_template: {e}")
|
||||
raise
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
def query_oracle_and_generate_parquet(partition_num, partitions, sql):
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Starting processing")
|
||||
|
||||
partition_sql = f"""SELECT /*+ PARALLEL(t, {partitions}) */ *
|
||||
FROM (
|
||||
{sql}
|
||||
) t
|
||||
WHERE ORA_HASH(ROWID, {partitions - 1}) = {partition_num}"""
|
||||
|
||||
conn = connect("MRDS_LOADER")
|
||||
cursor = conn.cursor()
|
||||
cursor.arraysize = BATCH_SIZE
|
||||
cursor.prefetchrows = BATCH_SIZE
|
||||
|
||||
s3_hook = S3Hook(aws_conn_id=AWS_CONN_ID)
|
||||
s3_client = s3_hook.get_conn()
|
||||
s3_key = f"{S3_PREFIX}partition_{partition_num:04d}.parquet"
|
||||
|
||||
buffer = io.BytesIO()
|
||||
|
||||
try:
|
||||
cursor.execute(partition_sql)
|
||||
|
||||
column_names = [desc[0] for desc in cursor.description]
|
||||
|
||||
first_batch_rows = cursor.fetchmany(BATCH_SIZE)
|
||||
if not first_batch_rows:
|
||||
logging.warning(f"[PyArrow-Partition {partition_num}] No data found")
|
||||
return
|
||||
|
||||
arrow_fields = []
|
||||
sample_row = first_batch_rows[0]
|
||||
|
||||
for i, col_name in enumerate(column_names):
|
||||
sample_val = sample_row[i]
|
||||
|
||||
if sample_val is None:
|
||||
for row in first_batch_rows[1:]:
|
||||
if row[i] is not None:
|
||||
sample_val = row[i]
|
||||
break
|
||||
|
||||
if isinstance(sample_val, str):
|
||||
arrow_type = pa.string()
|
||||
elif isinstance(sample_val, int):
|
||||
arrow_type = pa.int64()
|
||||
elif isinstance(sample_val, float):
|
||||
arrow_type = pa.float64()
|
||||
elif isinstance(sample_val, (datetime, pd.Timestamp)):
|
||||
arrow_type = pa.timestamp('ns')
|
||||
elif isinstance(sample_val, bytes):
|
||||
arrow_type = pa.binary()
|
||||
else:
|
||||
arrow_type = pa.string()
|
||||
|
||||
arrow_fields.append(pa.field(col_name, arrow_type))
|
||||
|
||||
schema = pa.schema(arrow_fields)
|
||||
|
||||
writer = pq.ParquetWriter(
|
||||
buffer,
|
||||
schema,
|
||||
compression='snappy',
|
||||
use_dictionary=True,
|
||||
write_statistics=True,
|
||||
data_page_size=2*1024*1024,
|
||||
version='2.6',
|
||||
)
|
||||
|
||||
def process_batch(rows):
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
columns_data = list(zip(*rows))
|
||||
arrays = []
|
||||
|
||||
for i, col_data in enumerate(columns_data):
|
||||
try:
|
||||
arrays.append(pa.array(col_data, type=schema.field(i).type))
|
||||
except Exception as e:
|
||||
logging.warning(f"[PyArrow-Partition {partition_num}] Column {column_names[i]} conversion failed: {e}")
|
||||
converted = [str(val) if val is not None else None for val in col_data]
|
||||
arrays.append(pa.array(converted, type=pa.string()))
|
||||
|
||||
return pa.RecordBatch.from_arrays(arrays, schema=schema)
|
||||
|
||||
batch = process_batch(first_batch_rows)
|
||||
if batch:
|
||||
writer.write_batch(batch)
|
||||
|
||||
total_rows = len(first_batch_rows)
|
||||
batch_count = 1
|
||||
|
||||
while True:
|
||||
rows = cursor.fetchmany(BATCH_SIZE)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
batch = process_batch(rows)
|
||||
if batch:
|
||||
writer.write_batch(batch)
|
||||
|
||||
total_rows += len(rows)
|
||||
batch_count += 1
|
||||
|
||||
if batch_count % 10 == 0:
|
||||
buffer_size_mb = buffer.tell() / 1024 / 1024
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Processed {total_rows:,} rows, Buffer size: {buffer_size_mb:.2f} MB")
|
||||
|
||||
writer.close()
|
||||
|
||||
buffer.seek(0)
|
||||
buffer_size_mb = buffer.getbuffer().nbytes / 1024 / 1024
|
||||
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Uploading {buffer_size_mb:.2f} MB to s3://{S3_BUCKET}/{s3_key}")
|
||||
|
||||
s3_client.upload_fileobj(buffer, S3_BUCKET, s3_key)
|
||||
|
||||
logging.info(f"[PyArrow-Partition {partition_num}] Completed - {total_rows:,} rows, {buffer_size_mb:.2f} MB uploaded to S3")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"[PyArrow-Partition {partition_num}] Error: {e}")
|
||||
raise
|
||||
finally:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
buffer.close()
|
||||
|
||||
|
||||
def generate_tasks(dag, partitions, sql):
|
||||
tasks = []
|
||||
for partition_num in range(partitions):
|
||||
task = PythonOperator(
|
||||
task_id=f"generate_parquet_partition_{partition_num}",
|
||||
python_callable=query_oracle_and_generate_parquet,
|
||||
op_kwargs={
|
||||
"partition_num": partition_num,
|
||||
"partitions": partitions,
|
||||
"sql": sql
|
||||
},
|
||||
provide_context=True,
|
||||
dag=dag,
|
||||
)
|
||||
tasks.append(task)
|
||||
return tasks
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_pyarrow',
|
||||
default_args=DEFAULT_ARGS,
|
||||
description='Devo replicator using PyArrow native processing',
|
||||
schedule_interval=SCHEDULE_INTERVAL,
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'PyArrow'],
|
||||
max_active_runs=1,
|
||||
max_active_tasks=16,
|
||||
) as dag:
|
||||
|
||||
query = query_oracle_template(OWNER, TABLE_NAME)
|
||||
|
||||
PARTITIONS = 16
|
||||
|
||||
partition_tasks = generate_tasks(dag, PARTITIONS, query)
|
||||
181
airflow/devo_replicator/devo_replicator_scheduler_rar.py
Normal file
181
airflow/devo_replicator/devo_replicator_scheduler_rar.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# dags/dev_replicator_scheduler_rar.py
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.decorators import task
|
||||
from airflow.operators.python import BranchPythonOperator
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from mrds.utils import oraconn # your Oracle connection helper
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
DAG_NAME = "devo_replicator_scheduler_rar"
|
||||
TARGET_DAG_ID = "devo_replicator_trigger"
|
||||
ORACLE_CONN_NAME = "MRDS_LOADER"
|
||||
|
||||
PRECONDITION_SQL = """
|
||||
WITH LAST_UPDATE_ORACLE AS (
|
||||
SELECT MAX(process_end) AS process_end
|
||||
FROM CT_RAR.A_RAR_FOR_DISC_MONITORING
|
||||
WHERE UPPER(owner || '.' || target_table_name) = UPPER(:table_name)
|
||||
AND process_end IS NOT NULL
|
||||
AND process_successful = 'Y'
|
||||
),
|
||||
LAST_UPDATE_DEVO AS (
|
||||
SELECT CASE
|
||||
WHEN last_status = 'FINISHED' THEN last_end_time
|
||||
ELSE TO_DATE('01-JAN-1999', 'DD-MON-YYYY')
|
||||
END AS process_end
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
WHERE owner || '.' || table_name = :table_name
|
||||
)
|
||||
SELECT CASE
|
||||
WHEN (SELECT process_end FROM LAST_UPDATE_ORACLE) >
|
||||
(SELECT process_end FROM LAST_UPDATE_DEVO)
|
||||
THEN 'Y' ELSE 'N'
|
||||
END AS trigger_devo_replicator
|
||||
FROM dual
|
||||
"""
|
||||
|
||||
def _get_conn():
|
||||
return oraconn.connect(ORACLE_CONN_NAME)
|
||||
|
||||
def get_devo_replica_table_options() -> list[str]:
|
||||
conn = None
|
||||
cur = None
|
||||
try:
|
||||
conn = _get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT OWNER || '.' || TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
tables = [r[0] for r in rows] if rows else []
|
||||
logging.info("Fetched %d table(s) from replica mgmt.", len(tables))
|
||||
return tables
|
||||
except Exception:
|
||||
logging.exception("Error getting DEVO replica table options")
|
||||
return []
|
||||
finally:
|
||||
try:
|
||||
if cur: cur.close()
|
||||
except Exception:
|
||||
pass
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
def check_table_precondition(table_full_name: str) -> dict:
|
||||
"""Returns {"table": <OWNER.TABLE>, "trigger": "Y"|"N"}."""
|
||||
conn = None
|
||||
cur = None
|
||||
try:
|
||||
conn = _get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute(PRECONDITION_SQL, {"table_name": table_full_name})
|
||||
row = cur.fetchone()
|
||||
status = (row[0] if row else 'N') or 'N'
|
||||
logging.info("Precondition for %s: %s", table_full_name, status)
|
||||
return {"table": table_full_name, "trigger": status}
|
||||
except Exception:
|
||||
logging.exception("Error checking precondition for %s", table_full_name)
|
||||
return {"table": table_full_name, "trigger": "N"} # fail closed
|
||||
finally:
|
||||
try:
|
||||
if cur: cur.close()
|
||||
except Exception:
|
||||
pass
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id=DAG_NAME,
|
||||
description="Checks DEVO replica preconditions; triggers devo_replicator_trigger_rar once if any table is stale",
|
||||
default_args=default_args,
|
||||
start_date=datetime.now() - timedelta(days=2),
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
schedule_interval='*/10 * * * *', # every 10 minutes
|
||||
max_active_runs=1,
|
||||
tags=["DevoScheduler", "DevoReplicatorTrigger"],
|
||||
) as dag:
|
||||
|
||||
@task
|
||||
def fetch_tables() -> list[str]:
|
||||
tables = get_devo_replica_table_options()
|
||||
if not tables:
|
||||
logging.warning("No tables returned from enumeration.")
|
||||
return tables
|
||||
|
||||
@task
|
||||
def check_one(table_name: str) -> dict:
|
||||
return check_table_precondition(table_name)
|
||||
|
||||
@task
|
||||
def summarize(results: list[dict]) -> dict:
|
||||
y_tables = [r["table"] for r in results if r and r.get("trigger") == "Y"]
|
||||
n_tables = [r["table"] for r in results if r and r.get("trigger") == "N"]
|
||||
|
||||
logging.info("Precondition summary -> Y: %d, N: %d", len(y_tables), len(n_tables))
|
||||
if y_tables:
|
||||
logging.info("Tables needing replication: %s", ", ".join(y_tables))
|
||||
else:
|
||||
logging.info("No tables are updated/stale; nothing to trigger.")
|
||||
|
||||
return {"any_true": bool(y_tables), "y_tables": y_tables}
|
||||
|
||||
def decide_branch(summary: dict) -> str:
|
||||
"""Return the EXACT downstream task_id to follow."""
|
||||
return "prepare_trigger_conf" if summary.get("any_true") else "no_updates"
|
||||
|
||||
@task
|
||||
def prepare_trigger_conf(summary: dict) -> dict:
|
||||
"""Single conf payload for the downstream DAG."""
|
||||
return {"tables_to_replicate": summary.get("y_tables", [])}
|
||||
|
||||
no_updates = EmptyOperator(task_id="no_updates")
|
||||
|
||||
# Graph
|
||||
tables = fetch_tables()
|
||||
results = check_one.expand(table_name=tables) # dynamic mapping across tables
|
||||
summary = summarize(results)
|
||||
|
||||
branch = BranchPythonOperator(
|
||||
task_id="branch_on_any",
|
||||
python_callable=decide_branch,
|
||||
op_args=[summary], # XComArg from summarize
|
||||
)
|
||||
|
||||
|
||||
conf_payload = prepare_trigger_conf(summary)
|
||||
|
||||
trigger_devo = TriggerDagRunOperator(
|
||||
task_id="trigger_devo_replicator_rar",
|
||||
trigger_dag_id=TARGET_DAG_ID,
|
||||
wait_for_completion=True,
|
||||
reset_dag_run=True,
|
||||
conf=conf_payload,
|
||||
)
|
||||
|
||||
# Wire branching — only ONE instance of prepare_trigger_conf is referenced
|
||||
summary >> branch
|
||||
branch >> no_updates
|
||||
branch >> conf_payload >> trigger_devo
|
||||
255
airflow/devo_replicator/devo_replicator_trigger.py
Normal file
255
airflow/devo_replicator/devo_replicator_trigger.py
Normal file
@@ -0,0 +1,255 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
from airflow.decorators import task
|
||||
from airflow.providers.oracle.hooks.oracle import OracleHook
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
ORACLE_CONN_ID = "MRDS_LOADER"
|
||||
# TARGET_DAG_ID = "devo_replicator_trigger_rar"
|
||||
|
||||
def get_rar_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER || '.' || TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RAR table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RAR tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
max_active_runs=1,
|
||||
params={
|
||||
# still allow manual runs from the UI
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type=["string", "null"],
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
#enum=get_rar_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# --- Init: read conf ---
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rar"
|
||||
owner_table = conf.get("owner_table") # optional single table
|
||||
tables_to_replicate = conf.get("tables_to_replicate") # optional list of OWNER.TABLE
|
||||
|
||||
# Log what we got
|
||||
if tables_to_replicate:
|
||||
logging.info("Received tables_to_replicate from upstream: %d table(s).", len(tables_to_replicate))
|
||||
elif owner_table:
|
||||
logging.info("Received single owner_table from conf: %s", owner_table)
|
||||
else:
|
||||
logging.info("No conf provided; manual UI param may be used or fallback to full list in get_table_list.")
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"owner_table": owner_table, # may be None
|
||||
"tables_to_replicate": tables_to_replicate # may be None/list
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# --- Build the processing list ---
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
tables_to_replicate = ti.xcom_pull(task_ids='init_step', key='tables_to_replicate')
|
||||
|
||||
# 1) If upstream provided a list, use it
|
||||
if tables_to_replicate:
|
||||
logging.info("Using tables_to_replicate list from conf: %d items", len(tables_to_replicate))
|
||||
tables = []
|
||||
for ot in tables_to_replicate:
|
||||
if '.' not in ot:
|
||||
logging.warning("Skipping malformed owner_table (no dot): %s", ot)
|
||||
continue
|
||||
table_owner, table_name = ot.split('.', 1)
|
||||
tables.append((table_owner, table_name))
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
|
||||
# 2) Else if a single owner_table provided (manual/programmatic)
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing single table from conf/params: %s", owner_table)
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
|
||||
# 3) Else fallback to full list in DB (manual run without conf)
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Fallback: Found %d tables for RAR", len(tables))
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# --- Keep your existing throttled triggering logic unchanged ---
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = 30 # you were pushing 30; keep it here or push from init
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT
|
||||
(SELECT NVL(SUM(MAX_THREADS),0) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT NVL(SUM(MAX_THREADS),0) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
cursor.execute(sql_query)
|
||||
total_running_val, table_running_val = cursor.fetchone()
|
||||
cursor.close()
|
||||
|
||||
logging.info(
|
||||
"Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val or 0, threshold, table_running_val or 0
|
||||
)
|
||||
|
||||
if (total_running_val or 0) > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if (table_running_val or 0) >= 1:
|
||||
logging.info("Table %s.%s already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger the core DAG for this specific table
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
conf = {"store": store, "owner_table": f"{table_owner}.{table_name}"}
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total core DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
init >> t1 >> t2
|
||||
|
||||
|
||||
"""
|
||||
Reading tables_to_replicate from dag_run.conf in init_step.
|
||||
Pushing it to XCom (so get_table_list can use it).
|
||||
Tell get_table_list to prioritize the provided list.
|
||||
init_step reads tables_to_replicate from dag_run.conf and puts it into XCom.
|
||||
get_table_list prioritizes that list; falls back to owner_table or full table list only if needed.
|
||||
check_and_trigger loops over those tables and triggers your core DAG (devo_replicator_core) per table, respecting your concurrency threshold.
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_replicator_trigger_mopdb.py
Normal file
257
airflow/devo_replicator/devo_replicator_trigger_mopdb.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get MOPDB table options for dropdown
|
||||
def get_mopdb_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting MOPDB table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger_mopdb',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for MOPDB tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_mopdb_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "mopdb"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
"threshold": 30,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for MOPDB", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Check and trigger core DAG
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Execute SQL query with variable substitution
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
|
||||
WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
|
||||
WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
total_running_val = result[0] or 0
|
||||
table_running_val = result[1] or 0
|
||||
cursor.close()
|
||||
|
||||
logging.info("Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val, threshold, table_running_val)
|
||||
|
||||
if total_running_val > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
MOPDB Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "mopdb" (fixed for this DAG)
|
||||
- Sets threshold to 30 (max concurrent running threads)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) check_and_trigger
|
||||
- Loops through each table from the table list
|
||||
- For each table, enters monitoring loop:
|
||||
- Executes SQL query to check total running threads across MOPDB+RAR
|
||||
- Checks if current table is already running
|
||||
- If total threads > threshold (30): waits 5 minutes and rechecks
|
||||
- If table already running: skips to next tabl
|
||||
- If conditions met: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered
|
||||
- Ensures system doesn't exceed concurrent processing limits
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_replicator_trigger_rar.py
Normal file
257
airflow/devo_replicator/devo_replicator_trigger_rar.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get MOPDB table options for dropdown
|
||||
def get_rar_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RAR table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger_rar',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RAR tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_rar_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rar"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
"threshold": 30,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RAR", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Check and trigger core DAG
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Execute SQL query with variable substitution
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
|
||||
WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
|
||||
WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
total_running_val = result[0] or 0
|
||||
table_running_val = result[1] or 0
|
||||
cursor.close()
|
||||
|
||||
logging.info("Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val, threshold, table_running_val)
|
||||
|
||||
if total_running_val > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RAR Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "rar" (fixed for this DAG)
|
||||
- Sets threshold to 30 (max concurrent running threads)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) check_and_trigger
|
||||
- Loops through each table from the table list
|
||||
- For each table, enters monitoring loop:
|
||||
- Executes SQL query to check total running threads across MOPDB+RAR
|
||||
- Checks if current table is already running
|
||||
- If total threads > threshold (30): waits 5 minutes and rechecks
|
||||
- If table already running: skips to next tabl
|
||||
- If conditions met: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered
|
||||
- Ensures system doesn't exceed concurrent processing limits
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_replicator_trigger_rqsd.py
Normal file
257
airflow/devo_replicator/devo_replicator_trigger_rqsd.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import time
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get RQSD table options for dropdown
|
||||
def get_rqsd_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RQSD table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_replicator_trigger_rqsd',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RQSD tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoReplicator', 'DevoReplicatorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_rqsd_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rqsd"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
"threshold": 30,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RQSD", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Check and trigger core DAG
|
||||
def check_and_trigger(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
threshold = ti.xcom_pull(task_ids='init_step', key='threshold')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
while True:
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Execute SQL query with variable substitution
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT (SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB
|
||||
WHERE LAST_STATUS = 'RUNNING') +
|
||||
(SELECT CASE WHEN SUM(MAX_THREADS) IS NULL THEN 0 ELSE SUM(MAX_THREADS) END AS RUNNING_THREADS
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_RAR
|
||||
WHERE LAST_STATUS = 'RUNNING')
|
||||
AS TOTAL_RUNNING_THREADS_NOW,
|
||||
(SELECT COUNT(*) FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}' AND TABLE_NAME = '{table_name}' AND LAST_STATUS = 'RUNNING') AS TABLE_IS_ALREADY_RUNNING
|
||||
FROM DUAL
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
total_running_val = result[0] or 0
|
||||
table_running_val = result[1] or 0
|
||||
cursor.close()
|
||||
|
||||
logging.info("Total running: %d, threshold: %d, table running: %d",
|
||||
total_running_val, threshold, table_running_val)
|
||||
|
||||
if total_running_val > threshold:
|
||||
logging.info("Threshold exceeded. Waiting 5 minutes...")
|
||||
time.sleep(300)
|
||||
continue
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
break
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
break
|
||||
|
||||
logging.info("Total DAGs triggered: %d", triggered_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in check_and_trigger: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='check_and_trigger',
|
||||
python_callable=check_and_trigger,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RQSD Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "rqsd" (fixed for this DAG)
|
||||
- Sets threshold to 30 (max concurrent running threads)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rqsd
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) check_and_trigger
|
||||
- Loops through each table from the table list
|
||||
- For each table, enters monitoring loop:
|
||||
- Executes SQL query to check total running threads across MOPDB+RAR
|
||||
- Checks if current table is already running
|
||||
- If total threads > threshold (30): waits 5 minutes and rechecks
|
||||
- If table already running: skips to next tabl
|
||||
- If conditions met: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered
|
||||
- Ensures system doesn't exceed concurrent processing limits
|
||||
"""
|
||||
1080
airflow/devo_replicator/devo_table_generator.py
Normal file
1080
airflow/devo_replicator/devo_table_generator.py
Normal file
File diff suppressed because it is too large
Load Diff
255
airflow/devo_replicator/devo_table_generator_trigger_mopdb.py
Normal file
255
airflow/devo_replicator/devo_table_generator_trigger_mopdb.py
Normal file
@@ -0,0 +1,255 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get MOPDB table options for dropdown
|
||||
def get_mopdb_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
options = ["__ALL_EXCLUDE_COPY__"] + [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting MOPDB table options: {e}")
|
||||
return ["__ALL_EXCLUDE_COPY__"]
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_table_generator_trigger_mopdb',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for MOPDB tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default="__ALL_EXCLUDE_COPY__",
|
||||
type="string",
|
||||
description="Select '__ALL_EXCLUDE_COPY__' to run all tables without _COPY, or select specific table in format OWNER.TABLE_NAME",
|
||||
enum=get_mopdb_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "mopdb"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
|
||||
# Handle special "run all" case
|
||||
run_all_exclude_copy = (owner_table == "__ALL_EXCLUDE_COPY__")
|
||||
|
||||
if not run_all_exclude_copy and '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME' or '__ALL_EXCLUDE_COPY__'")
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s run_all_exclude_copy=%s",
|
||||
env, store, owner_table, run_all_exclude_copy)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"owner_table": owner_table,
|
||||
"run_all_exclude_copy": run_all_exclude_copy,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
run_all_exclude_copy = ti.xcom_pull(task_ids='init_step', key='run_all_exclude_copy')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if run_all_exclude_copy:
|
||||
# Get all tables excluding those with _COPY in the name
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_mopdb
|
||||
WHERE TABLE_NAME NOT LIKE '%_COPY%'
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Processing ALL tables excluding _COPY: %d tables found", len(tables))
|
||||
elif owner_table:
|
||||
# Process specific table
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
# Fallback: get all tables
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_mopdb ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for MOPDB", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Trigger core DAG for each table
|
||||
def trigger_tables(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Check if table is already running
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}'
|
||||
AND TABLE_NAME = '{table_name}'
|
||||
AND LAST_STATUS = 'RUNNING'
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
table_running_val = result[0] or 0
|
||||
cursor.close()
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_table_generator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
|
||||
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
|
||||
triggered_count, skipped_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
ti.xcom_push(key='skipped_count', value=skipped_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in trigger_tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='trigger_tables',
|
||||
python_callable=trigger_tables,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
MOPDB Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME or __ALL_EXCLUDE_COPY__)
|
||||
- Sets store to "mopdb" (fixed for this DAG)
|
||||
- Determines if running all tables excluding _COPY
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If __ALL_EXCLUDE_COPY__: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb excluding _COPY tables
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_mopdb
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) trigger_tables
|
||||
- Loops through each table from the table list
|
||||
- For each table:
|
||||
- Checks if table is already running
|
||||
- If table already running: skips to next table
|
||||
- If not running: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered and skipped
|
||||
- No threshold checking or waiting logic
|
||||
"""
|
||||
257
airflow/devo_replicator/devo_table_generator_trigger_rar.py
Normal file
257
airflow/devo_replicator/devo_table_generator_trigger_rar.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get RAR table options for dropdown
|
||||
def get_rar_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
options = ["__ALL_EXCLUDE_COPY__"] + [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RAR table options: {e}")
|
||||
return ["__ALL_EXCLUDE_COPY__"]
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_table_generator_trigger_rar',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RAR tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default="__ALL_EXCLUDE_COPY__",
|
||||
type="string",
|
||||
description="Select '__ALL_EXCLUDE_COPY__' to run all tables without _COPY, or select specific table in format OWNER.TABLE_NAME",
|
||||
enum=get_rar_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rar"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
|
||||
# Handle special "run all" case
|
||||
run_all_exclude_copy = (owner_table == "__ALL_EXCLUDE_COPY__")
|
||||
|
||||
if not run_all_exclude_copy and '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME' or '__ALL_EXCLUDE_COPY__'")
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s run_all_exclude_copy=%s",
|
||||
env, store, owner_table, run_all_exclude_copy)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"owner_table": owner_table,
|
||||
"run_all_exclude_copy": run_all_exclude_copy,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
run_all_exclude_copy = ti.xcom_pull(task_ids='init_step', key='run_all_exclude_copy')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if run_all_exclude_copy:
|
||||
# Get all tables excluding those with _COPY in the name
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rar
|
||||
WHERE TABLE_NAME NOT LIKE '%_COPY%'
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
""")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Processing ALL tables excluding _COPY: %d tables found", len(tables))
|
||||
elif owner_table:
|
||||
# Process specific table
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
# Fallback: get all tables
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rar ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RAR", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Trigger core DAG for each table
|
||||
def trigger_tables(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Check if table is already running
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}'
|
||||
AND TABLE_NAME = '{table_name}'
|
||||
AND LAST_STATUS = 'RUNNING'
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
table_running_val = result[0] or 0
|
||||
cursor.close()
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_table_generator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
|
||||
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
|
||||
triggered_count, skipped_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
ti.xcom_push(key='skipped_count', value=skipped_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in trigger_tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='trigger_tables',
|
||||
python_callable=trigger_tables,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RAR Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME or __ALL_EXCLUDE_COPY__)
|
||||
- Sets store to "rar" (fixed for this DAG)
|
||||
- Determines if running all tables excluding _COPY
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If __ALL_EXCLUDE_COPY__: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar excluding _COPY tables
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rar
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) trigger_tables
|
||||
- Loops through each table from the table list
|
||||
- For each table:
|
||||
- Checks if table is already running
|
||||
- If table already running: skips to next table
|
||||
- If not running: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered and skipped
|
||||
- No threshold checking or waiting logic
|
||||
"""
|
||||
239
airflow/devo_replicator/devo_table_generator_trigger_rqsd.py
Normal file
239
airflow/devo_replicator/devo_table_generator_trigger_rqsd.py
Normal file
@@ -0,0 +1,239 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models import Param
|
||||
|
||||
sys.path.append('/opt/airflow/python/connectors/devo')
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Get RQSD table options for dropdown
|
||||
def get_rqsd_table_options():
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER || '.' || TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
options = [row[0] for row in cursor.fetchall()]
|
||||
cursor.close()
|
||||
return options
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RQSD table options: {e}")
|
||||
return []
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
default_args = {
|
||||
'owner': 'devo',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='devo_table_generator_trigger_rqsd',
|
||||
default_args=default_args,
|
||||
description='External trigger DAG for RQSD tables',
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
tags=['DevoTableGenerator', 'DevoTableGeneratorTrigger'],
|
||||
params={
|
||||
"owner_table": Param(
|
||||
default=None,
|
||||
type="string",
|
||||
description="Select table in format OWNER.TABLE_NAME",
|
||||
enum=get_rqsd_table_options()
|
||||
)
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Init
|
||||
def init_step(**context):
|
||||
dag_run = context.get("dag_run")
|
||||
ti = context["ti"]
|
||||
conf = (dag_run.conf or {}) if dag_run else {}
|
||||
|
||||
env = os.getenv("MRDS_ENV")
|
||||
if not env:
|
||||
raise ValueError("MRDS_ENV environment variable is required")
|
||||
env = env.lower()
|
||||
|
||||
store = "rqsd"
|
||||
owner_table = conf.get("owner_table")
|
||||
|
||||
if not owner_table:
|
||||
raise ValueError("owner_table parameter is required")
|
||||
if '.' not in owner_table:
|
||||
raise ValueError("owner_table must be in format 'OWNER.TABLE_NAME'")
|
||||
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
|
||||
if env not in {"dev", "tst", "acc", "prd"}:
|
||||
raise ValueError(f"Unsupported env '{env}'. Expected 'dev', 'tst', 'acc' or 'prd'.")
|
||||
|
||||
logging.info("=== init_step === env=%s store=%s owner_table=%s",
|
||||
env, store, owner_table)
|
||||
|
||||
xcom = {
|
||||
"env": env,
|
||||
"store": store,
|
||||
"table_owner": table_owner,
|
||||
"table_name": table_name,
|
||||
"owner_table": owner_table,
|
||||
}
|
||||
|
||||
for k, v in xcom.items():
|
||||
ti.xcom_push(key=k, value=v)
|
||||
|
||||
init = PythonOperator(
|
||||
task_id='init_step',
|
||||
python_callable=init_step,
|
||||
)
|
||||
|
||||
# Get table list
|
||||
def get_table_list(**context):
|
||||
ti = context["ti"]
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
owner_table = ti.xcom_pull(task_ids='init_step', key='owner_table')
|
||||
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
if owner_table:
|
||||
table_owner, table_name = owner_table.split('.', 1)
|
||||
tables = [(table_owner, table_name)]
|
||||
logging.info("Processing specific table: %s", owner_table)
|
||||
else:
|
||||
cursor = oracle_conn.cursor()
|
||||
cursor.execute("SELECT OWNER, TABLE_NAME FROM CT_MRDS.a_devo_replica_mgmt_rqsd ORDER BY OWNER, TABLE_NAME")
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
logging.info("Found %d tables for RQSD", len(tables))
|
||||
|
||||
ti.xcom_push(key='tables_to_process', value=tables)
|
||||
return tables
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_table_list: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t1 = PythonOperator(
|
||||
task_id='get_table_list',
|
||||
python_callable=get_table_list,
|
||||
)
|
||||
|
||||
# Trigger core DAG for each table
|
||||
def trigger_tables(**context):
|
||||
ti = context["ti"]
|
||||
env = ti.xcom_pull(task_ids='init_step', key='env')
|
||||
store = ti.xcom_pull(task_ids='init_step', key='store')
|
||||
tables = ti.xcom_pull(task_ids='get_table_list', key='tables_to_process')
|
||||
|
||||
oracle_conn = None
|
||||
triggered_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
|
||||
for table_owner, table_name in tables:
|
||||
logging.info("Processing table: %s.%s", table_owner, table_name)
|
||||
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Check if table is already running
|
||||
service_name = store.upper()
|
||||
sql_query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM CT_MRDS.A_DEVO_REPLICA_MGMT_{service_name}
|
||||
WHERE OWNER = '{table_owner}'
|
||||
AND TABLE_NAME = '{table_name}'
|
||||
AND LAST_STATUS = 'RUNNING'
|
||||
"""
|
||||
|
||||
cursor.execute(sql_query)
|
||||
result = cursor.fetchone()
|
||||
table_running_val = result[0] or 0
|
||||
cursor.close()
|
||||
|
||||
if table_running_val >= 1:
|
||||
logging.info("Table %s.%s is already running. Skipping.", table_owner, table_name)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Trigger core DAG
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
conf = {
|
||||
"store": store,
|
||||
"owner_table": f"{table_owner}.{table_name}"
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_table_generator_core',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info("Triggered core DAG for table %s.%s", table_owner, table_name)
|
||||
|
||||
logging.info("Summary: Total DAGs triggered: %d, Skipped (already running): %d",
|
||||
triggered_count, skipped_count)
|
||||
ti.xcom_push(key='triggered_count', value=triggered_count)
|
||||
ti.xcom_push(key='skipped_count', value=skipped_count)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in trigger_tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
t2 = PythonOperator(
|
||||
task_id='trigger_tables',
|
||||
python_callable=trigger_tables,
|
||||
)
|
||||
|
||||
# Dependencies
|
||||
init >> t1 >> t2
|
||||
|
||||
"""
|
||||
RQSD Trigger DAG
|
||||
1) init_step
|
||||
- Gets environment from MRDS_ENV environment variable
|
||||
- Reads owner_table parameter from DAG configuration
|
||||
- Validates owner_table format (must be OWNER.TABLE_NAME)
|
||||
- Sets store to "rqsd" (fixed for this DAG)
|
||||
- Pushes parameters to XCom
|
||||
2) get_table_list
|
||||
- Connects to Oracle database (MRDS_LOADER)
|
||||
- If specific owner_table provided: creates single table list
|
||||
- If no owner_table: queries all tables from CT_MRDS.a_devo_replica_mgmt_rqsd
|
||||
- Returns list of (owner, table_name) tuples to process
|
||||
- Pushes table list to XCom
|
||||
3) trigger_tables
|
||||
- Loops through each table from the table list
|
||||
- For each table:
|
||||
- Checks if table is already running
|
||||
- If table already running: skips to next table
|
||||
- If not running: triggers core DAG with table parameters
|
||||
- Counts and logs total number of DAGs triggered and skipped
|
||||
- No threshold checking or waiting logic
|
||||
"""
|
||||
146
airflow/mopdb/CSDB/DEBT/w_MOPDB_CSDB_DEBT.py
Normal file
146
airflow/mopdb/CSDB/DEBT/w_MOPDB_CSDB_DEBT.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT = DbtTaskGroup(
|
||||
group_id="m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
156
airflow/mopdb/LM/ADH_ADJUSTMENTS/w_MOPDB_LM_ADHOC_ADJUSTMENT.py
Normal file
156
airflow/mopdb/LM/ADH_ADJUSTMENTS/w_MOPDB_LM_ADHOC_ADJUSTMENT.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FC_ADH_ADJUSTMENTS_OU_LM_ADHOC_ADJUSTMENT >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
152
airflow/mopdb/LM/BALANCE_SHEET/w_MOPDB_LM_BALANCESHEET.py
Normal file
152
airflow/mopdb/LM/BALANCE_SHEET/w_MOPDB_LM_BALANCESHEET.py
Normal file
@@ -0,0 +1,152 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_BALANCE_SHEET_OU_LM_BALANCE_SHEET >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
147
airflow/mopdb/LM/CSM_ADJUSTMENTS/w_MOPDB_LM_CSM_ADJUSTMENT.py
Normal file
147
airflow/mopdb/LM/CSM_ADJUSTMENTS/w_MOPDB_LM_CSM_ADJUSTMENT.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FC_CSM_ADJUSTMENTS_OU_LM_CSM_ADJUSTMENT >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
157
airflow/mopdb/LM/CURRENT_ACCOUNTS/w_MOPDB_LM_CURRENT_ACCOUNTS.py
Normal file
157
airflow/mopdb/LM/CURRENT_ACCOUNTS/w_MOPDB_LM_CURRENT_ACCOUNTS.py
Normal file
@@ -0,0 +1,157 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_CURRENT_ACCOUNTS_OU_MRR_CURRENT_ACCOUNTS >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
149
airflow/mopdb/LM/FORECAST/w_MOPDB_LM_FORECAST.py
Normal file
149
airflow/mopdb/LM/FORECAST/w_MOPDB_LM_FORECAST.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FORECAST_OU_LM_FORECAST >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
155
airflow/mopdb/LM/QRE_ADJUSTMENTS/w_MOPDB_LM_QRE_ADJUSTMENT.py
Normal file
155
airflow/mopdb/LM/QRE_ADJUSTMENTS/w_MOPDB_LM_QRE_ADJUSTMENT.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_FC_QRE_ADJUSTMENTS_OU_LM_QRE_ADJUSTMENT >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES = DbtTaskGroup(
|
||||
group_id="m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(
|
||||
select=["tag:m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES"],
|
||||
exclude=["m_MOPDB_CSDB_DEBT_DAILY_OU_CSDB_DEBT_DAILY_SQ"],
|
||||
),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
def trigger_devo_replicator(**context):
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
|
||||
|
||||
target_dag_id = 'devo_replicator_trigger_mopdb'
|
||||
|
||||
# Create configuration dictionary
|
||||
trigger_conf = {
|
||||
"owner_table": "LM.T_STANDING_FACILITIES"
|
||||
}
|
||||
try:
|
||||
dag_run = trigger_dag(
|
||||
dag_id=target_dag_id,
|
||||
conf=trigger_conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
return dag_run.run_id
|
||||
except Exception as e:
|
||||
raise
|
||||
trigger_replicator = PythonOperator(
|
||||
task_id='trigger_devo_replicator',
|
||||
python_callable=trigger_devo_replicator,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_LM_T_STANDING_FACILITIES_OU_LM_STANDING_FACILITIES >> control_external_run_end >>trigger_replicator >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
0
airflow/mopdb/RQSD/.gitkeep
Normal file
0
airflow/mopdb/RQSD/.gitkeep
Normal file
0
airflow/mopdb/RQSD/RQSD_PROCESS/.gitkeep
Normal file
0
airflow/mopdb/RQSD/RQSD_PROCESS/.gitkeep
Normal file
316
airflow/mopdb/RQSD/RQSD_PROCESS/w_MOPDB_RQSD_PROCESS.py
Normal file
316
airflow/mopdb/RQSD/RQSD_PROCESS/w_MOPDB_RQSD_PROCESS.py
Normal file
@@ -0,0 +1,316 @@
|
||||
import os
|
||||
######
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
|
||||
from mrds.utils import oraconn
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
# Get verified run_id using security utilities
|
||||
run_id = get_verified_run_id(kwargs)
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
def get_rqsd_tables_to_replicate(**kwargs):
|
||||
"""
|
||||
Get list of RQSD tables from a_devo_replica_mgmt_rqsd,
|
||||
excluding tables ending with _COPY
|
||||
"""
|
||||
import logging
|
||||
oracle_conn = None
|
||||
try:
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER')
|
||||
cursor = oracle_conn.cursor()
|
||||
|
||||
# Query to get all tables excluding _COPY versions
|
||||
sql = """
|
||||
SELECT OWNER, TABLE_NAME
|
||||
FROM CT_MRDS.a_devo_replica_mgmt_rqsd
|
||||
WHERE TABLE_NAME NOT LIKE '%_COPY'
|
||||
ORDER BY OWNER, TABLE_NAME
|
||||
"""
|
||||
|
||||
cursor.execute(sql)
|
||||
tables = cursor.fetchall()
|
||||
cursor.close()
|
||||
|
||||
logging.info(f"Found {len(tables)} RQSD tables to replicate (excluding _COPY versions)")
|
||||
|
||||
# Convert to list of owner.table_name format
|
||||
table_list = [f"{owner}.{table_name}" for owner, table_name in tables]
|
||||
|
||||
# Push to XCom for next task
|
||||
kwargs["ti"].xcom_push(key="rqsd_tables", value=table_list)
|
||||
|
||||
return table_list
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting RQSD tables: {e}")
|
||||
raise
|
||||
finally:
|
||||
if oracle_conn:
|
||||
oracle_conn.close()
|
||||
|
||||
def trigger_rqsd_replication(**kwargs):
|
||||
"""
|
||||
Trigger devo_replicator_trigger_rqsd for each table in the list
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
ti = kwargs["ti"]
|
||||
table_list = ti.xcom_pull(task_ids="get_rqsd_tables", key="rqsd_tables")
|
||||
|
||||
if not table_list:
|
||||
logging.warning("No RQSD tables found to replicate")
|
||||
return
|
||||
|
||||
logging.info(f"Triggering replication for {len(table_list)} tables")
|
||||
|
||||
triggered_count = 0
|
||||
failed_triggers = []
|
||||
|
||||
for owner_table in table_list:
|
||||
try:
|
||||
conf = {
|
||||
"owner_table": owner_table
|
||||
}
|
||||
|
||||
trigger_dag(
|
||||
dag_id='devo_replicator_trigger_rqsd',
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False
|
||||
)
|
||||
|
||||
triggered_count += 1
|
||||
logging.info(f"Successfully triggered replication for {owner_table}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to trigger replication for {owner_table}: {e}")
|
||||
failed_triggers.append(owner_table)
|
||||
|
||||
logging.info(f"Replication triggered for {triggered_count}/{len(table_list)} tables")
|
||||
|
||||
if failed_triggers:
|
||||
logging.warning(f"Failed to trigger replication for: {', '.join(failed_triggers)}")
|
||||
|
||||
# Push results to XCom
|
||||
ti.xcom_push(key="triggered_count", value=triggered_count)
|
||||
ti.xcom_push(key="failed_triggers", value=failed_triggers)
|
||||
|
||||
return {
|
||||
"triggered_count": triggered_count,
|
||||
"total_tables": len(table_list),
|
||||
"failed_triggers": failed_triggers
|
||||
}
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# Get list of RQSD tables to replicate
|
||||
get_rqsd_tables = PythonOperator(
|
||||
task_id="get_rqsd_tables",
|
||||
python_callable=get_rqsd_tables_to_replicate,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
# Trigger replication for all RQSD tables
|
||||
trigger_rqsd_replication_task = PythonOperator(
|
||||
task_id="trigger_rqsd_replication",
|
||||
python_callable=trigger_rqsd_replication,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
# dependency chain
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> [
|
||||
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
] >> m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA >> control_external_run_end >> get_rqsd_tables >> trigger_rqsd_replication_task >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
216
airflow/mopdb/RQSD/RQSD_PROCESS/w_MOPDB_RQSD_PROCESS_MANUAL.py
Normal file
216
airflow/mopdb/RQSD/RQSD_PROCESS/w_MOPDB_RQSD_PROCESS_MANUAL.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
NEW CHANGES FOR MANUAL:
|
||||
Changed DAG_NAME from dynamic file-based to hardcoded "w_MOPDB_RQSD_PROCESS_MANUAL"
|
||||
Changed the last task group from m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA to m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL
|
||||
Updated the tag in the RenderConfig to "tag:m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL"
|
||||
Added tags=["MOPDB", "RQSD", "DBT", "MANUAL"] to the DAG decorator
|
||||
|
||||
KEEP IDENTICAL :
|
||||
All other task groups remain the same as the original MOPDB DAG
|
||||
Flow structure remains identical:
|
||||
read_vars → retrieve_run_id → control_external_run_start
|
||||
→ 5 parallel ANNEX tasks → MANUAL output task → control_external_run_end → dag_status
|
||||
"""
|
||||
import os
|
||||
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
|
||||
|
||||
DAG_NAME = "w_MOPDB_RQSD_PROCESS_MANUAL"
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = get_verified_run_id(kwargs)
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
tags=["MOPDB", "RQSD", "DBT", "MANUAL"]
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL = DbtTaskGroup(
|
||||
group_id="m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> [
|
||||
m_MOPDB_RQSD_ANNEX_1_1_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_1_2_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_1_1_FIN_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_1_2_FIN_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
m_MOPDB_RQSD_ANNEX_2_ALL_ODS_RQSD_OBSERVATIONS,
|
||||
] >> m_MOPDB_RQSD_OUTPUT_CURR_RQSD_NCB_SUBA_MANUAL >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
0
airflow/mopdb/TOP/.gitkeep
Normal file
0
airflow/mopdb/TOP/.gitkeep
Normal file
@@ -0,0 +1,156 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT = DbtTaskGroup(
|
||||
group_id="m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_TOP_T_AGGREGATED_ALLOTMENT_OU_TOP_AGGREGATED_ALLOTMENT >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
156
airflow/mopdb/TOP/ALLOTMENT/w_MOPDB_TOP_ALLOTMENT.py
Normal file
156
airflow/mopdb/TOP/ALLOTMENT/w_MOPDB_TOP_ALLOTMENT.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import os
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
||||
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
||||
|
||||
DATABASE_NAME_MAP = {
|
||||
"dev": "MOPDB",
|
||||
"test": "MOPDB_TEST",
|
||||
}
|
||||
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
||||
|
||||
# check cron 2
|
||||
|
||||
dbt_root_path = "/opt/dbt"
|
||||
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
||||
dbt_profiles_dir_parent = "/opt/dbt"
|
||||
|
||||
dbt_env = {
|
||||
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
||||
"DBT_TARGET": ENV_NAME,
|
||||
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
||||
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
||||
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
||||
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
||||
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
||||
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
||||
|
||||
"DBT_LOG_PATH": "/opt/dbt/logs",
|
||||
"DBT_TARGET_PATH": "/opt/dbt/target",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
}
|
||||
|
||||
def retrieve_run_id(**kwargs):
|
||||
run_id = kwargs["run_id"]
|
||||
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for ti in kwargs["dag_run"].get_task_instances():
|
||||
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
||||
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False,
|
||||
)
|
||||
def run_dag():
|
||||
def read_vars(**context):
|
||||
BUCKET = os.getenv("INBOX_BUCKET")
|
||||
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
print("========= DBT ENV =========")
|
||||
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
||||
return 1
|
||||
|
||||
read_vars_task = PythonOperator(
|
||||
task_id="read_vars",
|
||||
python_callable=read_vars,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id="retrieve_run_id",
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
)
|
||||
|
||||
control_external_run_start = BashOperator(
|
||||
task_id="control_external_run_start",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_start \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
)
|
||||
|
||||
common_profile = ProfileConfig(
|
||||
profiles_yml_filepath=dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name=ENV_NAME,
|
||||
)
|
||||
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
||||
common_vars = {
|
||||
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
||||
"input_service_name": DATABASE_NAME,
|
||||
"workflow_name": DAG_NAME,
|
||||
}
|
||||
common_operator_args = {
|
||||
"vars": common_vars,
|
||||
"env": dbt_env,
|
||||
}
|
||||
|
||||
m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT = DbtTaskGroup(
|
||||
group_id="m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT",
|
||||
project_config=common_project,
|
||||
profile_config=common_profile,
|
||||
render_config=RenderConfig(select=[
|
||||
"tag:m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT",
|
||||
]),
|
||||
operator_args=common_operator_args,
|
||||
)
|
||||
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id="control_external_run_end",
|
||||
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
||||
env=dbt_env,
|
||||
bash_command="""
|
||||
set -euxo pipefail
|
||||
cd /opt/dbt
|
||||
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
||||
run-operation control_external_run_end \
|
||||
--vars '{{ {
|
||||
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
||||
"input_service_name": params.db,
|
||||
"workflow_name": params.wf
|
||||
} | tojson }}'
|
||||
""",
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id="dag_status",
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
)
|
||||
|
||||
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_TOP_T_ALLOTMENT_OU_TOP_ALLOTMENT >> control_external_run_end >> dag_status
|
||||
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1534
airflow/ods/c2d/uc/config/common_ucdb.xsd
Normal file
1534
airflow/ods/c2d/uc/config/common_ucdb.xsd
Normal file
File diff suppressed because it is too large
Load Diff
124
airflow/ods/c2d/uc/config/disseminationFile.xsd
Normal file
124
airflow/ods/c2d/uc/config/disseminationFile.xsd
Normal file
@@ -0,0 +1,124 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<!-- ***************** Schema for UCDB DisseminationFile ******************* -->
|
||||
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:c2d="http://c2d.escb.eu/UseOfCollateralMessage"
|
||||
targetNamespace="http://c2d.escb.eu/UseOfCollateralMessage" elementFormDefault="qualified" version="3.0">
|
||||
<!-- ***************** Include Common types ******************* -->
|
||||
<xs:include schemaLocation="common_ucdb.xsd"/>
|
||||
<!-- Definition of the root element and its structure -->
|
||||
<xs:element name="DisseminationFile">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Use of Collateral message. Dissemination files hold all reported usages of a snapshot and a specific NCB.</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="MetaInformation" type="c2d:MetaInformationTypeDisseminationFile"/>
|
||||
<xs:element name="MarketableAssets" type="c2d:DisseminationMarketableAssetsType" minOccurs="0"/>
|
||||
<xs:element name="NonMarketableAssets" type="c2d:DisseminationNonMarketableAssetsType" minOccurs="0"/>
|
||||
<xs:element name="NonMarketableDECCs" type="c2d:DisseminationNonMarketableDECCsType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
<xs:attribute name="version" type="xs:string" use="required"/>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:complexType name="MetaInformationTypeDisseminationFile">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="c2d:AbstractMetaInformationType">
|
||||
<xs:sequence>
|
||||
<xs:element name="ReportingNCB" type="c2d:EurosystemISOCodeType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>The two letter code identifying the NCB contained in the dissemination file.</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:element>
|
||||
<xs:element name="SnapshotDate" type="xs:date">
|
||||
<xs:annotation>
|
||||
<xs:documentation>The date the snapshot of the initial file data was taken.</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:element>
|
||||
<xs:element name="DateCreated" type="xs:dateTime">
|
||||
<xs:annotation>
|
||||
<xs:documentation>The date when the dissemination file has been created.</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:element>
|
||||
<xs:element name="NumberOfSuspectRecords" type="xs:unsignedInt">
|
||||
<xs:annotation>
|
||||
<xs:documentation>The number of records in the dissemination file still in status suspect.</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:element>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationMarketableAssetsType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Holds all marketable assets.</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="MarketableAsset" type="c2d:DisseminationMarketableAssetType" maxOccurs="unbounded"/>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationNonMarketableAssetsType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Holds all non marketable assets.</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="NonMarketableAsset" type="c2d:DisseminationNonMarketableAssetType" maxOccurs="unbounded"/>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationNonMarketableDECCsType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Holds all non marketable DECCs.</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="NonMarketableDECC" type="c2d:DisseminationNonMarketableDECCType" maxOccurs="unbounded"/>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationMarketableAssetType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Marketable Assets with suspect addon</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:complexContent>
|
||||
<xs:extension base="c2d:MarketableAssetType">
|
||||
<xs:sequence>
|
||||
<xs:element name="SuspectInformation" type="c2d:DisseminationSuspectType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationNonMarketableAssetType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Non Marketable Assets with suspect addon</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:complexContent>
|
||||
<xs:extension base="c2d:NonMarketableAssetType">
|
||||
<xs:sequence>
|
||||
<xs:element name="SuspectInformation" type="c2d:DisseminationSuspectType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationNonMarketableDECCType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Non Marketable DECCs with suspect addon</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:complexContent>
|
||||
<xs:extension base="c2d:NonMarketableDECCType">
|
||||
<xs:sequence>
|
||||
<xs:element name="SuspectInformation" type="c2d:DisseminationSuspectType" minOccurs="0"/>
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
<xs:complexType name="DisseminationSuspectType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Holds all suspect information of a reported usage.</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="SuspectId" type="c2d:IntegerGreaterThanZeroType">
|
||||
<xs:annotation>
|
||||
<xs:documentation>An ID created by the UCDB system identifying each single record that is stored in the UCDB system as a suspect record.</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:element>
|
||||
<xs:element name="SuspectReasons" type="c2d:SuspectsReasonsType"/>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:schema>
|
||||
45
airflow/ods/c2d/uc/config/m_ODS_C2D_UC_DISSEM_PARSE.yml
Normal file
45
airflow/ods/c2d/uc/config/m_ODS_C2D_UC_DISSEM_PARSE.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
# Global configurations
|
||||
inbox_prefix: INBOX/C2D/CollateralDataDissemination
|
||||
archive_prefix: ARCHIVE/C2D/CollateralDataDissemination
|
||||
workflow_name: w_ODS_C2D_UC_DISSEMI
|
||||
validation_schema_path: 'disseminationFile.xsd'
|
||||
file_type: xml
|
||||
|
||||
# List of tasks
|
||||
|
||||
tasks:
|
||||
|
||||
# Task 1
|
||||
- task_name: m_ODS_C2D_UC_DISSEM_METADATA_PARSE
|
||||
ods_prefix: INBOX/C2D/CollateralDataDissemination/C2D_A_UC_DISSEM_METADATA_LOADS
|
||||
output_table: C2D_A_UC_DISSEM_METADATA_LOADS
|
||||
namespaces:
|
||||
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath'
|
||||
value: '//ns:DisseminationFile/@version'
|
||||
column_header: 'C2D_VERSION'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:DateCreated'
|
||||
column_header: 'FILE_CREATION_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:NumberOfSuspectRecords'
|
||||
column_header: 'NO_OF_SUSPECT_RECORDS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:ReportingNCB'
|
||||
column_header: 'REPORTING_NCB'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:SnapshotDate'
|
||||
column_header: 'SNAPSHOT_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'PROCESSED_TO_DWH'
|
||||
968
airflow/ods/c2d/uc/config/ucdb_cl.xsd
Normal file
968
airflow/ods/c2d/uc/config/ucdb_cl.xsd
Normal file
@@ -0,0 +1,968 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?><schema targetNamespace="http://c2d.escb.eu/UseOfCollateralMessage" elementFormDefault="qualified" attributeFormDefault="qualified" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.w3.org/2001/XMLSchema">
|
||||
|
||||
|
||||
<simpleType name="cl_issuer_csd">
|
||||
<restriction base="string">
|
||||
<enumeration value="CLAT01"/>
|
||||
<enumeration value="CLBE01"/>
|
||||
<enumeration value="CLBE02"/>
|
||||
<enumeration value="CLBG01"/>
|
||||
<enumeration value="CLBL01"/>
|
||||
<enumeration value="CLCY01"/>
|
||||
<enumeration value="CLCZ01"/>
|
||||
<enumeration value="CLDE01"/>
|
||||
<enumeration value="CLDE02"/>
|
||||
<enumeration value="CLDK01"/>
|
||||
<enumeration value="CLDL01"/>
|
||||
<enumeration value="CLEE01"/>
|
||||
<enumeration value="CLES01"/>
|
||||
<enumeration value="CLEU01"/>
|
||||
<enumeration value="CLFI01"/>
|
||||
<enumeration value="CLFR01"/>
|
||||
<enumeration value="CLGR01"/>
|
||||
<enumeration value="CLHR01"/>
|
||||
<enumeration value="CLIT01"/>
|
||||
<enumeration value="CLLD01"/>
|
||||
<enumeration value="CLLT02"/>
|
||||
<enumeration value="CLLU01"/>
|
||||
<enumeration value="CLLU03"/>
|
||||
<enumeration value="CLLV02"/>
|
||||
<enumeration value="CLMT01"/>
|
||||
<enumeration value="CLNL01"/>
|
||||
<enumeration value="CLPT02"/>
|
||||
<enumeration value="CLSI01"/>
|
||||
<enumeration value="CLSK01"/>
|
||||
<enumeration value="CLSK02"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_ccb">
|
||||
<restriction base="string">
|
||||
<enumeration value="AT"/>
|
||||
<enumeration value="BE"/>
|
||||
<enumeration value="BG"/>
|
||||
<enumeration value="CY"/>
|
||||
<enumeration value="DE"/>
|
||||
<enumeration value="EE"/>
|
||||
<enumeration value="ES"/>
|
||||
<enumeration value="FI"/>
|
||||
<enumeration value="FR"/>
|
||||
<enumeration value="GR"/>
|
||||
<enumeration value="HR"/>
|
||||
<enumeration value="IE"/>
|
||||
<enumeration value="IT"/>
|
||||
<enumeration value="LT"/>
|
||||
<enumeration value="LU"/>
|
||||
<enumeration value="LV"/>
|
||||
<enumeration value="MT"/>
|
||||
<enumeration value="NL"/>
|
||||
<enumeration value="PT"/>
|
||||
<enumeration value="SI"/>
|
||||
<enumeration value="SK"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_mobilisation_channel">
|
||||
<restriction base="string">
|
||||
<enumeration value="CCBM mkt"/>
|
||||
<enumeration value="CCBM mkt with links"/>
|
||||
<enumeration value="CCBM nonmkt"/>
|
||||
<enumeration value="Direct access"/>
|
||||
<enumeration value="Direct access with links"/>
|
||||
<enumeration value="Local CSD"/>
|
||||
<enumeration value="Local CSD with links"/>
|
||||
<enumeration value="Local cb nonmkt"/>
|
||||
<enumeration value="Local dom nonmkt"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_triparty_agent">
|
||||
<restriction base="string">
|
||||
<enumeration value="CLBE02"/>
|
||||
<enumeration value="CLDE01"/>
|
||||
<enumeration value="CLFR01"/>
|
||||
<enumeration value="CLIT01"/>
|
||||
<enumeration value="CLLU01"/>
|
||||
<enumeration value="CLNL01"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_la_rating">
|
||||
<restriction base="string">
|
||||
<enumeration value="LADB01"/>
|
||||
<enumeration value="LADB02"/>
|
||||
<enumeration value="LADB03"/>
|
||||
<enumeration value="LADB04"/>
|
||||
<enumeration value="LADB05"/>
|
||||
<enumeration value="LADB06"/>
|
||||
<enumeration value="LADB07"/>
|
||||
<enumeration value="LADB08"/>
|
||||
<enumeration value="LADB09"/>
|
||||
<enumeration value="LADB10"/>
|
||||
<enumeration value="LADB11"/>
|
||||
<enumeration value="LADB12"/>
|
||||
<enumeration value="LADB13"/>
|
||||
<enumeration value="LADB14"/>
|
||||
<enumeration value="LADB15"/>
|
||||
<enumeration value="LADB16"/>
|
||||
<enumeration value="LADB17"/>
|
||||
<enumeration value="LADB18"/>
|
||||
<enumeration value="LADB19"/>
|
||||
<enumeration value="LADB20"/>
|
||||
<enumeration value="LADB21"/>
|
||||
<enumeration value="LADB22"/>
|
||||
<enumeration value="LADB23"/>
|
||||
<enumeration value="LADB24"/>
|
||||
<enumeration value="LADB25"/>
|
||||
<enumeration value="LADB26"/>
|
||||
<enumeration value="LAIA01"/>
|
||||
<enumeration value="LAIA02"/>
|
||||
<enumeration value="LAIA03"/>
|
||||
<enumeration value="LAIA04"/>
|
||||
<enumeration value="LAIA05"/>
|
||||
<enumeration value="LAIA06"/>
|
||||
<enumeration value="LAIA07"/>
|
||||
<enumeration value="LAIA08"/>
|
||||
<enumeration value="LAIA09"/>
|
||||
<enumeration value="LAIA10"/>
|
||||
<enumeration value="LAIA11"/>
|
||||
<enumeration value="LAIA12"/>
|
||||
<enumeration value="LAIA13"/>
|
||||
<enumeration value="LAIA14"/>
|
||||
<enumeration value="LAIA15"/>
|
||||
<enumeration value="LAIA16"/>
|
||||
<enumeration value="LAIA17"/>
|
||||
<enumeration value="LAIA18"/>
|
||||
<enumeration value="LAIA19"/>
|
||||
<enumeration value="LAIA20"/>
|
||||
<enumeration value="LAIA21"/>
|
||||
<enumeration value="LAMY01"/>
|
||||
<enumeration value="LAMY02"/>
|
||||
<enumeration value="LAMY03"/>
|
||||
<enumeration value="LAMY04"/>
|
||||
<enumeration value="LAMY05"/>
|
||||
<enumeration value="LAMY06"/>
|
||||
<enumeration value="LAMY07"/>
|
||||
<enumeration value="LAMY08"/>
|
||||
<enumeration value="LAMY09"/>
|
||||
<enumeration value="LAMY10"/>
|
||||
<enumeration value="LAMY11"/>
|
||||
<enumeration value="LAMY12"/>
|
||||
<enumeration value="LAMY13"/>
|
||||
<enumeration value="LAMY14"/>
|
||||
<enumeration value="LAMY15"/>
|
||||
<enumeration value="LAMY16"/>
|
||||
<enumeration value="LAMY17"/>
|
||||
<enumeration value="LAMY18"/>
|
||||
<enumeration value="LAMY19"/>
|
||||
<enumeration value="LAMY20"/>
|
||||
<enumeration value="LAMY21"/>
|
||||
<enumeration value="LASC01"/>
|
||||
<enumeration value="LASC02"/>
|
||||
<enumeration value="LASC03"/>
|
||||
<enumeration value="LASC04"/>
|
||||
<enumeration value="LASC05"/>
|
||||
<enumeration value="LASC06"/>
|
||||
<enumeration value="LASC07"/>
|
||||
<enumeration value="LASC08"/>
|
||||
<enumeration value="LASC09"/>
|
||||
<enumeration value="LASC10"/>
|
||||
<enumeration value="LASC11"/>
|
||||
<enumeration value="LASC12"/>
|
||||
<enumeration value="LASC13"/>
|
||||
<enumeration value="LASC14"/>
|
||||
<enumeration value="LASC15"/>
|
||||
<enumeration value="LASC16"/>
|
||||
<enumeration value="LASC17"/>
|
||||
<enumeration value="LASC18"/>
|
||||
<enumeration value="LASC19"/>
|
||||
<enumeration value="LASC20"/>
|
||||
<enumeration value="LASC21"/>
|
||||
<enumeration value="LASP01"/>
|
||||
<enumeration value="LASP02"/>
|
||||
<enumeration value="LASP03"/>
|
||||
<enumeration value="LASP04"/>
|
||||
<enumeration value="LASP05"/>
|
||||
<enumeration value="LASP06"/>
|
||||
<enumeration value="LASP07"/>
|
||||
<enumeration value="LASP08"/>
|
||||
<enumeration value="LASP09"/>
|
||||
<enumeration value="LASP10"/>
|
||||
<enumeration value="LASP11"/>
|
||||
<enumeration value="LASP12"/>
|
||||
<enumeration value="LASP13"/>
|
||||
<enumeration value="LASP14"/>
|
||||
<enumeration value="LASP15"/>
|
||||
<enumeration value="LASP16"/>
|
||||
<enumeration value="LASP17"/>
|
||||
<enumeration value="LASP18"/>
|
||||
<enumeration value="LASP19"/>
|
||||
<enumeration value="LASP20"/>
|
||||
<enumeration value="LASP21"/>
|
||||
<enumeration value="LASP22"/>
|
||||
<enumeration value="LPDB01"/>
|
||||
<enumeration value="LPDB02"/>
|
||||
<enumeration value="LPDB03"/>
|
||||
<enumeration value="LPDB04"/>
|
||||
<enumeration value="LPDB05"/>
|
||||
<enumeration value="LPDB06"/>
|
||||
<enumeration value="LPDB07"/>
|
||||
<enumeration value="LPDB08"/>
|
||||
<enumeration value="LPDB09"/>
|
||||
<enumeration value="LPDB10"/>
|
||||
<enumeration value="LPDB11"/>
|
||||
<enumeration value="LPDB12"/>
|
||||
<enumeration value="LPDB13"/>
|
||||
<enumeration value="LPDB14"/>
|
||||
<enumeration value="LPDB15"/>
|
||||
<enumeration value="LPDB16"/>
|
||||
<enumeration value="LPDB17"/>
|
||||
<enumeration value="LPDB18"/>
|
||||
<enumeration value="LPDB19"/>
|
||||
<enumeration value="LPDB20"/>
|
||||
<enumeration value="LPDB21"/>
|
||||
<enumeration value="LPDB22"/>
|
||||
<enumeration value="LPDB23"/>
|
||||
<enumeration value="LPDB24"/>
|
||||
<enumeration value="LPDB25"/>
|
||||
<enumeration value="LPDB26"/>
|
||||
<enumeration value="LPIA01"/>
|
||||
<enumeration value="LPIA02"/>
|
||||
<enumeration value="LPIA03"/>
|
||||
<enumeration value="LPIA04"/>
|
||||
<enumeration value="LPIA05"/>
|
||||
<enumeration value="LPIA06"/>
|
||||
<enumeration value="LPIA07"/>
|
||||
<enumeration value="LPIA08"/>
|
||||
<enumeration value="LPIA09"/>
|
||||
<enumeration value="LPIA10"/>
|
||||
<enumeration value="LPIA11"/>
|
||||
<enumeration value="LPIA12"/>
|
||||
<enumeration value="LPIA13"/>
|
||||
<enumeration value="LPIA14"/>
|
||||
<enumeration value="LPIA15"/>
|
||||
<enumeration value="LPIA16"/>
|
||||
<enumeration value="LPIA17"/>
|
||||
<enumeration value="LPIA18"/>
|
||||
<enumeration value="LPIA19"/>
|
||||
<enumeration value="LPIA20"/>
|
||||
<enumeration value="LPIA21"/>
|
||||
<enumeration value="LPMY01"/>
|
||||
<enumeration value="LPMY02"/>
|
||||
<enumeration value="LPMY03"/>
|
||||
<enumeration value="LPMY04"/>
|
||||
<enumeration value="LPMY05"/>
|
||||
<enumeration value="LPMY06"/>
|
||||
<enumeration value="LPMY07"/>
|
||||
<enumeration value="LPMY08"/>
|
||||
<enumeration value="LPMY09"/>
|
||||
<enumeration value="LPMY10"/>
|
||||
<enumeration value="LPMY11"/>
|
||||
<enumeration value="LPMY12"/>
|
||||
<enumeration value="LPMY13"/>
|
||||
<enumeration value="LPMY14"/>
|
||||
<enumeration value="LPMY15"/>
|
||||
<enumeration value="LPMY16"/>
|
||||
<enumeration value="LPMY17"/>
|
||||
<enumeration value="LPMY18"/>
|
||||
<enumeration value="LPMY19"/>
|
||||
<enumeration value="LPMY20"/>
|
||||
<enumeration value="LPMY21"/>
|
||||
<enumeration value="LPSC01"/>
|
||||
<enumeration value="LPSC02"/>
|
||||
<enumeration value="LPSC03"/>
|
||||
<enumeration value="LPSC04"/>
|
||||
<enumeration value="LPSC05"/>
|
||||
<enumeration value="LPSC06"/>
|
||||
<enumeration value="LPSC07"/>
|
||||
<enumeration value="LPSC08"/>
|
||||
<enumeration value="LPSC09"/>
|
||||
<enumeration value="LPSC10"/>
|
||||
<enumeration value="LPSC11"/>
|
||||
<enumeration value="LPSC12"/>
|
||||
<enumeration value="LPSC13"/>
|
||||
<enumeration value="LPSC14"/>
|
||||
<enumeration value="LPSC15"/>
|
||||
<enumeration value="LPSC16"/>
|
||||
<enumeration value="LPSC17"/>
|
||||
<enumeration value="LPSC18"/>
|
||||
<enumeration value="LPSC19"/>
|
||||
<enumeration value="LPSC20"/>
|
||||
<enumeration value="LPSC21"/>
|
||||
<enumeration value="LPSP01"/>
|
||||
<enumeration value="LPSP02"/>
|
||||
<enumeration value="LPSP03"/>
|
||||
<enumeration value="LPSP04"/>
|
||||
<enumeration value="LPSP05"/>
|
||||
<enumeration value="LPSP06"/>
|
||||
<enumeration value="LPSP07"/>
|
||||
<enumeration value="LPSP08"/>
|
||||
<enumeration value="LPSP09"/>
|
||||
<enumeration value="LPSP10"/>
|
||||
<enumeration value="LPSP11"/>
|
||||
<enumeration value="LPSP12"/>
|
||||
<enumeration value="LPSP13"/>
|
||||
<enumeration value="LPSP14"/>
|
||||
<enumeration value="LPSP15"/>
|
||||
<enumeration value="LPSP16"/>
|
||||
<enumeration value="LPSP17"/>
|
||||
<enumeration value="LPSP18"/>
|
||||
<enumeration value="LPSP19"/>
|
||||
<enumeration value="LPSP20"/>
|
||||
<enumeration value="LPSP21"/>
|
||||
<enumeration value="LPSP22"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_si_rating">
|
||||
<restriction base="string">
|
||||
<enumeration value="SIDB01"/>
|
||||
<enumeration value="SIDB02"/>
|
||||
<enumeration value="SIDB03"/>
|
||||
<enumeration value="SIDB04"/>
|
||||
<enumeration value="SIDB05"/>
|
||||
<enumeration value="SIDB06"/>
|
||||
<enumeration value="SIDB07"/>
|
||||
<enumeration value="SIDB08"/>
|
||||
<enumeration value="SIDB09"/>
|
||||
<enumeration value="SIIA01"/>
|
||||
<enumeration value="SIIA02"/>
|
||||
<enumeration value="SIIA03"/>
|
||||
<enumeration value="SIIA04"/>
|
||||
<enumeration value="SIIA05"/>
|
||||
<enumeration value="SIIA06"/>
|
||||
<enumeration value="SIIA07"/>
|
||||
<enumeration value="SIMY01"/>
|
||||
<enumeration value="SIMY02"/>
|
||||
<enumeration value="SIMY03"/>
|
||||
<enumeration value="SIMY04"/>
|
||||
<enumeration value="SISC01"/>
|
||||
<enumeration value="SISC02"/>
|
||||
<enumeration value="SISC03"/>
|
||||
<enumeration value="SISC04"/>
|
||||
<enumeration value="SISC05"/>
|
||||
<enumeration value="SISC06"/>
|
||||
<enumeration value="SISC07"/>
|
||||
<enumeration value="SISP01"/>
|
||||
<enumeration value="SISP02"/>
|
||||
<enumeration value="SISP03"/>
|
||||
<enumeration value="SISP04"/>
|
||||
<enumeration value="SISP05"/>
|
||||
<enumeration value="SISP06"/>
|
||||
<enumeration value="SISP07"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_li_rating">
|
||||
<restriction base="string">
|
||||
<enumeration value="LIDB01"/>
|
||||
<enumeration value="LIDB02"/>
|
||||
<enumeration value="LIDB03"/>
|
||||
<enumeration value="LIDB04"/>
|
||||
<enumeration value="LIDB05"/>
|
||||
<enumeration value="LIDB06"/>
|
||||
<enumeration value="LIDB07"/>
|
||||
<enumeration value="LIDB08"/>
|
||||
<enumeration value="LIDB09"/>
|
||||
<enumeration value="LIDB10"/>
|
||||
<enumeration value="LIDB11"/>
|
||||
<enumeration value="LIDB12"/>
|
||||
<enumeration value="LIDB13"/>
|
||||
<enumeration value="LIDB14"/>
|
||||
<enumeration value="LIDB15"/>
|
||||
<enumeration value="LIDB16"/>
|
||||
<enumeration value="LIDB17"/>
|
||||
<enumeration value="LIDB18"/>
|
||||
<enumeration value="LIDB19"/>
|
||||
<enumeration value="LIDB20"/>
|
||||
<enumeration value="LIDB21"/>
|
||||
<enumeration value="LIDB22"/>
|
||||
<enumeration value="LIDB23"/>
|
||||
<enumeration value="LIDB24"/>
|
||||
<enumeration value="LIDB25"/>
|
||||
<enumeration value="LIDB26"/>
|
||||
<enumeration value="LIIA01"/>
|
||||
<enumeration value="LIIA02"/>
|
||||
<enumeration value="LIIA03"/>
|
||||
<enumeration value="LIIA04"/>
|
||||
<enumeration value="LIIA05"/>
|
||||
<enumeration value="LIIA06"/>
|
||||
<enumeration value="LIIA07"/>
|
||||
<enumeration value="LIIA08"/>
|
||||
<enumeration value="LIIA09"/>
|
||||
<enumeration value="LIIA10"/>
|
||||
<enumeration value="LIIA11"/>
|
||||
<enumeration value="LIIA12"/>
|
||||
<enumeration value="LIIA13"/>
|
||||
<enumeration value="LIIA14"/>
|
||||
<enumeration value="LIIA15"/>
|
||||
<enumeration value="LIIA16"/>
|
||||
<enumeration value="LIIA17"/>
|
||||
<enumeration value="LIIA18"/>
|
||||
<enumeration value="LIIA19"/>
|
||||
<enumeration value="LIIA20"/>
|
||||
<enumeration value="LIIA21"/>
|
||||
<enumeration value="LIIA22"/>
|
||||
<enumeration value="LIMY01"/>
|
||||
<enumeration value="LIMY02"/>
|
||||
<enumeration value="LIMY03"/>
|
||||
<enumeration value="LIMY04"/>
|
||||
<enumeration value="LIMY05"/>
|
||||
<enumeration value="LIMY06"/>
|
||||
<enumeration value="LIMY07"/>
|
||||
<enumeration value="LIMY08"/>
|
||||
<enumeration value="LIMY09"/>
|
||||
<enumeration value="LIMY10"/>
|
||||
<enumeration value="LIMY11"/>
|
||||
<enumeration value="LIMY12"/>
|
||||
<enumeration value="LIMY13"/>
|
||||
<enumeration value="LIMY14"/>
|
||||
<enumeration value="LIMY15"/>
|
||||
<enumeration value="LIMY16"/>
|
||||
<enumeration value="LIMY17"/>
|
||||
<enumeration value="LIMY18"/>
|
||||
<enumeration value="LIMY19"/>
|
||||
<enumeration value="LIMY20"/>
|
||||
<enumeration value="LIMY21"/>
|
||||
<enumeration value="LISC01"/>
|
||||
<enumeration value="LISC02"/>
|
||||
<enumeration value="LISC03"/>
|
||||
<enumeration value="LISC04"/>
|
||||
<enumeration value="LISC05"/>
|
||||
<enumeration value="LISC06"/>
|
||||
<enumeration value="LISC07"/>
|
||||
<enumeration value="LISC08"/>
|
||||
<enumeration value="LISC09"/>
|
||||
<enumeration value="LISC10"/>
|
||||
<enumeration value="LISC11"/>
|
||||
<enumeration value="LISC12"/>
|
||||
<enumeration value="LISC13"/>
|
||||
<enumeration value="LISC14"/>
|
||||
<enumeration value="LISC15"/>
|
||||
<enumeration value="LISC16"/>
|
||||
<enumeration value="LISC17"/>
|
||||
<enumeration value="LISC18"/>
|
||||
<enumeration value="LISC19"/>
|
||||
<enumeration value="LISC20"/>
|
||||
<enumeration value="LISC21"/>
|
||||
<enumeration value="LISP01"/>
|
||||
<enumeration value="LISP02"/>
|
||||
<enumeration value="LISP03"/>
|
||||
<enumeration value="LISP04"/>
|
||||
<enumeration value="LISP05"/>
|
||||
<enumeration value="LISP06"/>
|
||||
<enumeration value="LISP07"/>
|
||||
<enumeration value="LISP08"/>
|
||||
<enumeration value="LISP09"/>
|
||||
<enumeration value="LISP10"/>
|
||||
<enumeration value="LISP11"/>
|
||||
<enumeration value="LISP12"/>
|
||||
<enumeration value="LISP13"/>
|
||||
<enumeration value="LISP14"/>
|
||||
<enumeration value="LISP15"/>
|
||||
<enumeration value="LISP16"/>
|
||||
<enumeration value="LISP17"/>
|
||||
<enumeration value="LISP18"/>
|
||||
<enumeration value="LISP19"/>
|
||||
<enumeration value="LISP20"/>
|
||||
<enumeration value="LISP21"/>
|
||||
<enumeration value="LISP22"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_lg_rating">
|
||||
<restriction base="string">
|
||||
<enumeration value="LGDB01"/>
|
||||
<enumeration value="LGDB02"/>
|
||||
<enumeration value="LGDB03"/>
|
||||
<enumeration value="LGDB04"/>
|
||||
<enumeration value="LGDB05"/>
|
||||
<enumeration value="LGDB06"/>
|
||||
<enumeration value="LGDB07"/>
|
||||
<enumeration value="LGDB08"/>
|
||||
<enumeration value="LGDB09"/>
|
||||
<enumeration value="LGDB10"/>
|
||||
<enumeration value="LGDB11"/>
|
||||
<enumeration value="LGDB12"/>
|
||||
<enumeration value="LGDB13"/>
|
||||
<enumeration value="LGDB14"/>
|
||||
<enumeration value="LGDB15"/>
|
||||
<enumeration value="LGDB16"/>
|
||||
<enumeration value="LGDB17"/>
|
||||
<enumeration value="LGDB18"/>
|
||||
<enumeration value="LGDB19"/>
|
||||
<enumeration value="LGDB20"/>
|
||||
<enumeration value="LGDB21"/>
|
||||
<enumeration value="LGDB22"/>
|
||||
<enumeration value="LGDB23"/>
|
||||
<enumeration value="LGDB24"/>
|
||||
<enumeration value="LGDB25"/>
|
||||
<enumeration value="LGDB26"/>
|
||||
<enumeration value="LGIA01"/>
|
||||
<enumeration value="LGIA02"/>
|
||||
<enumeration value="LGIA03"/>
|
||||
<enumeration value="LGIA04"/>
|
||||
<enumeration value="LGIA05"/>
|
||||
<enumeration value="LGIA06"/>
|
||||
<enumeration value="LGIA07"/>
|
||||
<enumeration value="LGIA08"/>
|
||||
<enumeration value="LGIA09"/>
|
||||
<enumeration value="LGIA10"/>
|
||||
<enumeration value="LGIA11"/>
|
||||
<enumeration value="LGIA12"/>
|
||||
<enumeration value="LGIA13"/>
|
||||
<enumeration value="LGIA14"/>
|
||||
<enumeration value="LGIA15"/>
|
||||
<enumeration value="LGIA16"/>
|
||||
<enumeration value="LGIA17"/>
|
||||
<enumeration value="LGIA18"/>
|
||||
<enumeration value="LGIA19"/>
|
||||
<enumeration value="LGIA20"/>
|
||||
<enumeration value="LGIA21"/>
|
||||
<enumeration value="LGIA22"/>
|
||||
<enumeration value="LGMY01"/>
|
||||
<enumeration value="LGMY02"/>
|
||||
<enumeration value="LGMY03"/>
|
||||
<enumeration value="LGMY04"/>
|
||||
<enumeration value="LGMY05"/>
|
||||
<enumeration value="LGMY06"/>
|
||||
<enumeration value="LGMY07"/>
|
||||
<enumeration value="LGMY08"/>
|
||||
<enumeration value="LGMY09"/>
|
||||
<enumeration value="LGMY10"/>
|
||||
<enumeration value="LGMY11"/>
|
||||
<enumeration value="LGMY12"/>
|
||||
<enumeration value="LGMY13"/>
|
||||
<enumeration value="LGMY14"/>
|
||||
<enumeration value="LGMY15"/>
|
||||
<enumeration value="LGMY16"/>
|
||||
<enumeration value="LGMY17"/>
|
||||
<enumeration value="LGMY18"/>
|
||||
<enumeration value="LGMY19"/>
|
||||
<enumeration value="LGMY20"/>
|
||||
<enumeration value="LGMY21"/>
|
||||
<enumeration value="LGSC01"/>
|
||||
<enumeration value="LGSC02"/>
|
||||
<enumeration value="LGSC03"/>
|
||||
<enumeration value="LGSC04"/>
|
||||
<enumeration value="LGSC05"/>
|
||||
<enumeration value="LGSC06"/>
|
||||
<enumeration value="LGSC07"/>
|
||||
<enumeration value="LGSC08"/>
|
||||
<enumeration value="LGSC09"/>
|
||||
<enumeration value="LGSC10"/>
|
||||
<enumeration value="LGSC11"/>
|
||||
<enumeration value="LGSC12"/>
|
||||
<enumeration value="LGSC13"/>
|
||||
<enumeration value="LGSC14"/>
|
||||
<enumeration value="LGSC15"/>
|
||||
<enumeration value="LGSC16"/>
|
||||
<enumeration value="LGSC17"/>
|
||||
<enumeration value="LGSC18"/>
|
||||
<enumeration value="LGSC19"/>
|
||||
<enumeration value="LGSC20"/>
|
||||
<enumeration value="LGSC21"/>
|
||||
<enumeration value="LGSP01"/>
|
||||
<enumeration value="LGSP02"/>
|
||||
<enumeration value="LGSP03"/>
|
||||
<enumeration value="LGSP04"/>
|
||||
<enumeration value="LGSP05"/>
|
||||
<enumeration value="LGSP06"/>
|
||||
<enumeration value="LGSP07"/>
|
||||
<enumeration value="LGSP08"/>
|
||||
<enumeration value="LGSP09"/>
|
||||
<enumeration value="LGSP10"/>
|
||||
<enumeration value="LGSP11"/>
|
||||
<enumeration value="LGSP12"/>
|
||||
<enumeration value="LGSP13"/>
|
||||
<enumeration value="LGSP14"/>
|
||||
<enumeration value="LGSP15"/>
|
||||
<enumeration value="LGSP16"/>
|
||||
<enumeration value="LGSP17"/>
|
||||
<enumeration value="LGSP18"/>
|
||||
<enumeration value="LGSP19"/>
|
||||
<enumeration value="LGSP20"/>
|
||||
<enumeration value="LGSP21"/>
|
||||
<enumeration value="LGSP22"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_sa_rating">
|
||||
<restriction base="string">
|
||||
<enumeration value="SADB01"/>
|
||||
<enumeration value="SADB02"/>
|
||||
<enumeration value="SADB03"/>
|
||||
<enumeration value="SADB04"/>
|
||||
<enumeration value="SADB05"/>
|
||||
<enumeration value="SADB06"/>
|
||||
<enumeration value="SADB07"/>
|
||||
<enumeration value="SADB08"/>
|
||||
<enumeration value="SADB09"/>
|
||||
<enumeration value="SAIA01"/>
|
||||
<enumeration value="SAIA02"/>
|
||||
<enumeration value="SAIA03"/>
|
||||
<enumeration value="SAIA04"/>
|
||||
<enumeration value="SAIA05"/>
|
||||
<enumeration value="SAIA06"/>
|
||||
<enumeration value="SAIA07"/>
|
||||
<enumeration value="SAMY01"/>
|
||||
<enumeration value="SAMY02"/>
|
||||
<enumeration value="SAMY03"/>
|
||||
<enumeration value="SAMY04"/>
|
||||
<enumeration value="SASC01"/>
|
||||
<enumeration value="SASC02"/>
|
||||
<enumeration value="SASC03"/>
|
||||
<enumeration value="SASC04"/>
|
||||
<enumeration value="SASC05"/>
|
||||
<enumeration value="SASC06"/>
|
||||
<enumeration value="SASC07"/>
|
||||
<enumeration value="SASP01"/>
|
||||
<enumeration value="SASP02"/>
|
||||
<enumeration value="SASP03"/>
|
||||
<enumeration value="SASP04"/>
|
||||
<enumeration value="SASP05"/>
|
||||
<enumeration value="SASP06"/>
|
||||
<enumeration value="SASP07"/>
|
||||
<enumeration value="SPDB01"/>
|
||||
<enumeration value="SPDB02"/>
|
||||
<enumeration value="SPDB03"/>
|
||||
<enumeration value="SPDB04"/>
|
||||
<enumeration value="SPDB05"/>
|
||||
<enumeration value="SPDB06"/>
|
||||
<enumeration value="SPDB07"/>
|
||||
<enumeration value="SPDB08"/>
|
||||
<enumeration value="SPDB09"/>
|
||||
<enumeration value="SPIA01"/>
|
||||
<enumeration value="SPIA02"/>
|
||||
<enumeration value="SPIA03"/>
|
||||
<enumeration value="SPIA04"/>
|
||||
<enumeration value="SPIA05"/>
|
||||
<enumeration value="SPIA06"/>
|
||||
<enumeration value="SPIA07"/>
|
||||
<enumeration value="SPMY01"/>
|
||||
<enumeration value="SPMY02"/>
|
||||
<enumeration value="SPMY03"/>
|
||||
<enumeration value="SPMY04"/>
|
||||
<enumeration value="SPSC01"/>
|
||||
<enumeration value="SPSC02"/>
|
||||
<enumeration value="SPSC03"/>
|
||||
<enumeration value="SPSC04"/>
|
||||
<enumeration value="SPSC05"/>
|
||||
<enumeration value="SPSC06"/>
|
||||
<enumeration value="SPSC07"/>
|
||||
<enumeration value="SPSP01"/>
|
||||
<enumeration value="SPSP02"/>
|
||||
<enumeration value="SPSP03"/>
|
||||
<enumeration value="SPSP04"/>
|
||||
<enumeration value="SPSP05"/>
|
||||
<enumeration value="SPSP06"/>
|
||||
<enumeration value="SPSP07"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_cgr_rating">
|
||||
<restriction base="string">
|
||||
<enumeration value="10"/>
|
||||
<enumeration value="109"/>
|
||||
<enumeration value="20"/>
|
||||
<enumeration value="30"/>
|
||||
<enumeration value="40"/>
|
||||
<enumeration value="99"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_reference_rate">
|
||||
<restriction base="string">
|
||||
<enumeration value="10MEUBOR"/>
|
||||
<enumeration value="10YEUIRS"/>
|
||||
<enumeration value="10YGOTTEX"/>
|
||||
<enumeration value="10YICAP"/>
|
||||
<enumeration value="10YICES"/>
|
||||
<enumeration value="10YOLO"/>
|
||||
<enumeration value="11MEUBOR"/>
|
||||
<enumeration value="11YEUIRS"/>
|
||||
<enumeration value="11YICAP"/>
|
||||
<enumeration value="11YOLO"/>
|
||||
<enumeration value="12YEUIRS"/>
|
||||
<enumeration value="12YGOTTEX"/>
|
||||
<enumeration value="12YICAP"/>
|
||||
<enumeration value="12YICES"/>
|
||||
<enumeration value="12YOLO"/>
|
||||
<enumeration value="13YEUIRS"/>
|
||||
<enumeration value="13YICAP"/>
|
||||
<enumeration value="13YOLO"/>
|
||||
<enumeration value="14YEUIRS"/>
|
||||
<enumeration value="14YICAP"/>
|
||||
<enumeration value="14YOLO"/>
|
||||
<enumeration value="15YEUIRS"/>
|
||||
<enumeration value="15YGOTTEX"/>
|
||||
<enumeration value="15YICAP"/>
|
||||
<enumeration value="15YICES"/>
|
||||
<enumeration value="15YOLO"/>
|
||||
<enumeration value="16YICAP"/>
|
||||
<enumeration value="16YOLO"/>
|
||||
<enumeration value="17YICAP"/>
|
||||
<enumeration value="17YOLO"/>
|
||||
<enumeration value="18YICAP"/>
|
||||
<enumeration value="18YOLO"/>
|
||||
<enumeration value="19YICAP"/>
|
||||
<enumeration value="19YOLO"/>
|
||||
<enumeration value="1MEUBOR"/>
|
||||
<enumeration value="1MEUCMS"/>
|
||||
<enumeration value="1MLIBOR"/>
|
||||
<enumeration value="1MLICMS"/>
|
||||
<enumeration value="1WEUBOR"/>
|
||||
<enumeration value="1WEUCMS"/>
|
||||
<enumeration value="1WLIBOR"/>
|
||||
<enumeration value="1WLICMS"/>
|
||||
<enumeration value="1YEUBOR"/>
|
||||
<enumeration value="1YEUCMS"/>
|
||||
<enumeration value="1YEUIRS"/>
|
||||
<enumeration value="1YICAP"/>
|
||||
<enumeration value="1YICES"/>
|
||||
<enumeration value="1YLIBOR"/>
|
||||
<enumeration value="1YLICMS"/>
|
||||
<enumeration value="1YOLO"/>
|
||||
<enumeration value="20YEUIRS"/>
|
||||
<enumeration value="20YGOTTEX"/>
|
||||
<enumeration value="20YICAP"/>
|
||||
<enumeration value="20YICES"/>
|
||||
<enumeration value="20YOLO"/>
|
||||
<enumeration value="21YICAP"/>
|
||||
<enumeration value="21YOLO"/>
|
||||
<enumeration value="22YICAP"/>
|
||||
<enumeration value="22YOLO"/>
|
||||
<enumeration value="23YICAP"/>
|
||||
<enumeration value="23YOLO"/>
|
||||
<enumeration value="24YICAP"/>
|
||||
<enumeration value="24YOLO"/>
|
||||
<enumeration value="25YEUIRS"/>
|
||||
<enumeration value="25YICAP"/>
|
||||
<enumeration value="25YICES"/>
|
||||
<enumeration value="25YOLO"/>
|
||||
<enumeration value="26YICAP"/>
|
||||
<enumeration value="26YOLO"/>
|
||||
<enumeration value="27YICAP"/>
|
||||
<enumeration value="27YOLO"/>
|
||||
<enumeration value="28YICAP"/>
|
||||
<enumeration value="28YOLO"/>
|
||||
<enumeration value="29YICAP"/>
|
||||
<enumeration value="29YOLO"/>
|
||||
<enumeration value="2MEUBOR"/>
|
||||
<enumeration value="2MEUCMS"/>
|
||||
<enumeration value="2MLIBOR"/>
|
||||
<enumeration value="2MLICMS"/>
|
||||
<enumeration value="2WEUBOR"/>
|
||||
<enumeration value="2WEUCMS"/>
|
||||
<enumeration value="2WLIBOR"/>
|
||||
<enumeration value="2WLICMS"/>
|
||||
<enumeration value="2YEUIRS"/>
|
||||
<enumeration value="2YGOTTEX"/>
|
||||
<enumeration value="2YICAP"/>
|
||||
<enumeration value="2YICES"/>
|
||||
<enumeration value="2YOLO"/>
|
||||
<enumeration value="30YEUIRS"/>
|
||||
<enumeration value="30YGOTTEX"/>
|
||||
<enumeration value="30YICAP"/>
|
||||
<enumeration value="30YICES"/>
|
||||
<enumeration value="30YOLO"/>
|
||||
<enumeration value="35YICAP"/>
|
||||
<enumeration value="3MEUBOR"/>
|
||||
<enumeration value="3MEUCMS"/>
|
||||
<enumeration value="3MLIBOR"/>
|
||||
<enumeration value="3MLICMS"/>
|
||||
<enumeration value="3WEUBOR"/>
|
||||
<enumeration value="3YEUIRS"/>
|
||||
<enumeration value="3YGOTTEX"/>
|
||||
<enumeration value="3YICAP"/>
|
||||
<enumeration value="3YICES"/>
|
||||
<enumeration value="3YOLO"/>
|
||||
<enumeration value="40YICAP"/>
|
||||
<enumeration value="4MEUBOR"/>
|
||||
<enumeration value="4YEUIRS"/>
|
||||
<enumeration value="4YGOTTEX"/>
|
||||
<enumeration value="4YICAP"/>
|
||||
<enumeration value="4YICES"/>
|
||||
<enumeration value="4YOLO"/>
|
||||
<enumeration value="50YICAP"/>
|
||||
<enumeration value="5MEUBOR"/>
|
||||
<enumeration value="5YEUIRS"/>
|
||||
<enumeration value="5YGOTTEX"/>
|
||||
<enumeration value="5YICAP"/>
|
||||
<enumeration value="5YICES"/>
|
||||
<enumeration value="5YOLO"/>
|
||||
<enumeration value="6MEUBOR"/>
|
||||
<enumeration value="6MEUCMS"/>
|
||||
<enumeration value="6MLIBOR"/>
|
||||
<enumeration value="6MLICMS"/>
|
||||
<enumeration value="6YEUIRS"/>
|
||||
<enumeration value="6YGOTTEX"/>
|
||||
<enumeration value="6YICAP"/>
|
||||
<enumeration value="6YICES"/>
|
||||
<enumeration value="6YOLO"/>
|
||||
<enumeration value="7MEUBOR"/>
|
||||
<enumeration value="7YEUIRS"/>
|
||||
<enumeration value="7YGOTTEX"/>
|
||||
<enumeration value="7YICAP"/>
|
||||
<enumeration value="7YICES"/>
|
||||
<enumeration value="7YOLO"/>
|
||||
<enumeration value="8MEUBOR"/>
|
||||
<enumeration value="8YEUIRS"/>
|
||||
<enumeration value="8YGOTTEX"/>
|
||||
<enumeration value="8YICAP"/>
|
||||
<enumeration value="8YICES"/>
|
||||
<enumeration value="8YOLO"/>
|
||||
<enumeration value="9MEUBOR"/>
|
||||
<enumeration value="9MEUCMS"/>
|
||||
<enumeration value="9MLIBOR"/>
|
||||
<enumeration value="9MLICMS"/>
|
||||
<enumeration value="9YEUIRS"/>
|
||||
<enumeration value="9YGOTTEX"/>
|
||||
<enumeration value="9YICAP"/>
|
||||
<enumeration value="9YICES"/>
|
||||
<enumeration value="9YOLO"/>
|
||||
<enumeration value="A10YEUIRS"/>
|
||||
<enumeration value="A11YEUIRS"/>
|
||||
<enumeration value="A12YEUIRS"/>
|
||||
<enumeration value="A13YEUIRS"/>
|
||||
<enumeration value="A14YEUIRS"/>
|
||||
<enumeration value="A15YEUIRS"/>
|
||||
<enumeration value="A1MEUBOR"/>
|
||||
<enumeration value="A1MEUCMS"/>
|
||||
<enumeration value="A1MLIBOR"/>
|
||||
<enumeration value="A1MLICMS"/>
|
||||
<enumeration value="A1WEUBOR"/>
|
||||
<enumeration value="A1WEUCMS"/>
|
||||
<enumeration value="A1WLIBOR"/>
|
||||
<enumeration value="A1WLICMS"/>
|
||||
<enumeration value="A1YEUBOR"/>
|
||||
<enumeration value="A1YEUCMS"/>
|
||||
<enumeration value="A1YEUIRS"/>
|
||||
<enumeration value="A1YLIBOR"/>
|
||||
<enumeration value="A1YLICMS"/>
|
||||
<enumeration value="A20YEUIRS"/>
|
||||
<enumeration value="A25YEUIRS"/>
|
||||
<enumeration value="A2MEUBOR"/>
|
||||
<enumeration value="A2MEUCMS"/>
|
||||
<enumeration value="A2MLIBOR"/>
|
||||
<enumeration value="A2MLICMS"/>
|
||||
<enumeration value="A2WEUBOR"/>
|
||||
<enumeration value="A2WEUCMS"/>
|
||||
<enumeration value="A2WLIBOR"/>
|
||||
<enumeration value="A2WLICMS"/>
|
||||
<enumeration value="A2YEUIRS"/>
|
||||
<enumeration value="A30YEUIRS"/>
|
||||
<enumeration value="A3MEUBOR"/>
|
||||
<enumeration value="A3MEUCMS"/>
|
||||
<enumeration value="A3MLIBOR"/>
|
||||
<enumeration value="A3MLICMS"/>
|
||||
<enumeration value="A3YEUIRS"/>
|
||||
<enumeration value="A4YEUIRS"/>
|
||||
<enumeration value="A5YEUIRS"/>
|
||||
<enumeration value="A6MEUBOR"/>
|
||||
<enumeration value="A6MEUCMS"/>
|
||||
<enumeration value="A6MLIBOR"/>
|
||||
<enumeration value="A6MLICMS"/>
|
||||
<enumeration value="A6YEUIRS"/>
|
||||
<enumeration value="A7YEUIRS"/>
|
||||
<enumeration value="A8YEUIRS"/>
|
||||
<enumeration value="A9MEUBOR"/>
|
||||
<enumeration value="A9MEUCMS"/>
|
||||
<enumeration value="A9MLIBOR"/>
|
||||
<enumeration value="A9MLICMS"/>
|
||||
<enumeration value="A9YEUIRS"/>
|
||||
<enumeration value="ATG815"/>
|
||||
<enumeration value="ATG8WBG"/>
|
||||
<enumeration value="ATGMIN10"/>
|
||||
<enumeration value="ATGMIN8"/>
|
||||
<enumeration value="CNOTEC10"/>
|
||||
<enumeration value="EONIA"/>
|
||||
<enumeration value="ESBond"/>
|
||||
<enumeration value="ESTR"/>
|
||||
<enumeration value="EURR002W"/>
|
||||
<enumeration value="EUSA10M"/>
|
||||
<enumeration value="EUSA10Y"/>
|
||||
<enumeration value="EUSA11M"/>
|
||||
<enumeration value="EUSA11Y"/>
|
||||
<enumeration value="EUSA12M"/>
|
||||
<enumeration value="EUSA12Y"/>
|
||||
<enumeration value="EUSA13Y"/>
|
||||
<enumeration value="EUSA14Y"/>
|
||||
<enumeration value="EUSA15M"/>
|
||||
<enumeration value="EUSA15Y"/>
|
||||
<enumeration value="EUSA16Y"/>
|
||||
<enumeration value="EUSA17Y"/>
|
||||
<enumeration value="EUSA18M"/>
|
||||
<enumeration value="EUSA18Y"/>
|
||||
<enumeration value="EUSA19Y"/>
|
||||
<enumeration value="EUSA1D"/>
|
||||
<enumeration value="EUSA1M"/>
|
||||
<enumeration value="EUSA1W"/>
|
||||
<enumeration value="EUSA20Y"/>
|
||||
<enumeration value="EUSA21M"/>
|
||||
<enumeration value="EUSA27M"/>
|
||||
<enumeration value="EUSA2M"/>
|
||||
<enumeration value="EUSA2Y"/>
|
||||
<enumeration value="EUSA30M"/>
|
||||
<enumeration value="EUSA33M"/>
|
||||
<enumeration value="EUSA3M"/>
|
||||
<enumeration value="EUSA3Y"/>
|
||||
<enumeration value="EUSA4M"/>
|
||||
<enumeration value="EUSA4Y"/>
|
||||
<enumeration value="EUSA5M"/>
|
||||
<enumeration value="EUSA5Y"/>
|
||||
<enumeration value="EUSA6M"/>
|
||||
<enumeration value="EUSA6Y"/>
|
||||
<enumeration value="EUSA7M"/>
|
||||
<enumeration value="EUSA7Y"/>
|
||||
<enumeration value="EUSA8M"/>
|
||||
<enumeration value="EUSA8Y"/>
|
||||
<enumeration value="EUSA9M"/>
|
||||
<enumeration value="EUSA9Y"/>
|
||||
<enumeration value="MUDRB"/>
|
||||
<enumeration value="OTHER"/>
|
||||
<enumeration value="OTHER_NS"/>
|
||||
<enumeration value="QMUDRB"/>
|
||||
<enumeration value="QUDRB"/>
|
||||
<enumeration value="RENDSTATO"/>
|
||||
<enumeration value="SMUDRB"/>
|
||||
<enumeration value="SUDRB"/>
|
||||
<enumeration value="T4M"/>
|
||||
<enumeration value="TAG"/>
|
||||
<enumeration value="TAM"/>
|
||||
<enumeration value="TME"/>
|
||||
<enumeration value="UDRB"/>
|
||||
<enumeration value="UDRBQWBG"/>
|
||||
<enumeration value="YUDRB"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
<simpleType name="cl_eurosystem_iso_code">
|
||||
<restriction base="string">
|
||||
<enumeration value="AT"/>
|
||||
<enumeration value="BE"/>
|
||||
<enumeration value="BG"/>
|
||||
<enumeration value="CY"/>
|
||||
<enumeration value="DE"/>
|
||||
<enumeration value="EE"/>
|
||||
<enumeration value="ES"/>
|
||||
<enumeration value="EU"/>
|
||||
<enumeration value="FI"/>
|
||||
<enumeration value="FR"/>
|
||||
<enumeration value="GR"/>
|
||||
<enumeration value="HR"/>
|
||||
<enumeration value="IE"/>
|
||||
<enumeration value="IT"/>
|
||||
<enumeration value="LT"/>
|
||||
<enumeration value="LU"/>
|
||||
<enumeration value="LV"/>
|
||||
<enumeration value="MT"/>
|
||||
<enumeration value="NL"/>
|
||||
<enumeration value="PT"/>
|
||||
<enumeration value="SI"/>
|
||||
<enumeration value="SK"/>
|
||||
</restriction>
|
||||
</simpleType>
|
||||
|
||||
|
||||
</schema>
|
||||
2412
airflow/ods/c2d/uc/config/xsd/UseOfCollateralMessage.xsd
Normal file
2412
airflow/ods/c2d/uc/config/xsd/UseOfCollateralMessage.xsd
Normal file
File diff suppressed because it is too large
Load Diff
386
airflow/ods/c2d/uc/config/yaml/c2d_uc_dissem.yaml
Normal file
386
airflow/ods/c2d/uc/config/yaml/c2d_uc_dissem.yaml
Normal file
@@ -0,0 +1,386 @@
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/C2D/UC_DISSEM
|
||||
workflow_name: w_OU_C2D_UC_DISSEM
|
||||
validation_schema_path: '/opt/airflow/src/airflow/ods/c2d/uc/config/xsd/UseOfCollateralMessage.xsd'
|
||||
bucket: mrds_inbox_tst
|
||||
file_type: xml
|
||||
|
||||
# List of tasks
|
||||
tasks:
|
||||
- task_name: t_ODS_C2D_UC_DISSEM_create_metadata_file
|
||||
ods_prefix: INBOX/C2D/UC_DISSEM/A_UC_DISSEM_METADATA_LOADS
|
||||
output_table: A_UC_DISSEM_METADATA_LOADS
|
||||
namespaces:
|
||||
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath'
|
||||
value: '//ns:DisseminationFile/@version'
|
||||
column_header: 'C2D_VERSION'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:DateCreated'
|
||||
column_header: 'FILE_CREATION_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:NumberOfSuspectRecords'
|
||||
column_header: 'NO_OF_SUSPECT_RECORDS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:ReportingNCB'
|
||||
column_header: 'REPORTING_NCB'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:SnapshotDate'
|
||||
column_header: 'SNAPSHOT_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: 'N'
|
||||
column_header: 'PROCESSED_TO_DWH'
|
||||
|
||||
- task_name: ou_C2D_UC_DISSEM_create_marketable_assets_file
|
||||
ods_prefix: INBOX/C2D/UC_DISSEM/UC_MA_DISSEM
|
||||
output_table: UC_MA_DISSEM
|
||||
namespaces:
|
||||
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:ReportingNCB'
|
||||
is_key: 'Y'
|
||||
column_header: 'REPORTING_NCB'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:SnapshotDate'
|
||||
is_key: 'Y'
|
||||
column_header: 'SNAPSHOT_DATE'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:DateCreated'
|
||||
column_header: 'FILE_CREATION_DATE'
|
||||
is_key: 'Y'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:MFIId'
|
||||
column_header: 'MFI_ID'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:RegistrationCode/ns:ISINCode'
|
||||
column_header: 'ISIN_CODE'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'OTHER_REG_NO'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:NominalAmountSubmitted'
|
||||
column_header: 'NOM_AMT_SUBMITTED'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:CollateralValueBeforeHaircuts'
|
||||
column_header: 'COLL_BEFORE_HAIRCUTS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:CollateralValueAfterHaircuts'
|
||||
column_header: 'COLL_AFTER_HAIRCUTS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:TypeOfSystem'
|
||||
column_header: 'TYPE_OF_SYSTEM'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'TYPE_OF_OPERATION'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:DomesticOrXborder'
|
||||
column_header: 'DOM_OR_XBORDER'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ISSUER_CAS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ISSUER_CRED_PROVIDER'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ISSUER_CLASS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ISSUER_RATING_ENUM_VALUE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ISSUER_RATING_NUMBER_VALUE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'NCB_COMMENT'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:MobilisationChannel'
|
||||
column_header: 'MOBILISATION_CHANNEL'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:CCB'
|
||||
column_header: 'CCB'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:InvestorSSS'
|
||||
column_header: 'INVESTOR_SSS'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'INTERMEDIARY_SSS'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MarketableAssets/ns:MarketableAsset/ns:IssuerSSS'
|
||||
column_header: 'ISSUER_SSS'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'TRIPARTY_AGENT'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'SUSPECT_ID'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'QUALITY_CHECK_STATUS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ERROR_CODE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ERROR_MESSAGE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ERROR_POSITION_IN_FILE'
|
||||
|
||||
- task_name: ou_C2D_UC_DISSEM_create_nonmarketable_assets_file
|
||||
ods_prefix: INBOX/C2D/UC_DISSEM/UC_NMA_DISSEM
|
||||
output_table: UC_NMA_DISSEM
|
||||
namespaces:
|
||||
ns: 'http://c2d.escb.eu/UseOfCollateralMessage'
|
||||
xsi: 'http://www.w3.org/2001/XMLSchema-instance'
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:ReportingNCB'
|
||||
is_key: 'Y'
|
||||
column_header: 'REPORTING_NCB'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:SnapshotDate'
|
||||
is_key: 'Y'
|
||||
column_header: 'SNAPSHOT_DATE'
|
||||
- type: 'xpath'
|
||||
value: '//ns:MetaInformation/ns:DateCreated'
|
||||
column_header: 'FILE_CREATION_DATE'
|
||||
is_key: 'Y'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:MFIId'
|
||||
column_header: 'MFI_ID'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:OtherRegistrationNumber'
|
||||
column_header: 'OTHER_REG_NO'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:TypeOfSystem'
|
||||
column_header: 'TYPE_OF_SYSTEM'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'TYPE_OF_OPERATION'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:DomesticOrXborder'
|
||||
column_header: 'DOM_OR_XBORDER'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NonMktAssetType'
|
||||
column_header: 'NON_MKT_ASSET_TYPE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:DateOfMaturity'
|
||||
column_header: 'MATURITY_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:InterestPaymentType'
|
||||
column_header: 'INTEREST_PAYMENT_TYPE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Cap'
|
||||
column_header: 'CAP'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ReferenceRate'
|
||||
column_header: 'REFERENCE_RATE'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'REFERENCE_RATE_COMMENT'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:CollateralValueBeforeHaircuts'
|
||||
column_header: 'COLL_BEFORE_HAIRCUTS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:CollateralValueAfterHaircuts'
|
||||
column_header: 'COLL_AFTER_HAIRCUTS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NumberOfAggregatedDebtors'
|
||||
column_header: 'NO_AGGR_DEBTORS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:EligibleViaGuarantor'
|
||||
column_header: 'ELIGIBLE_VIA_GUAR'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/@xsi:type'
|
||||
column_header: 'DEBTOR_TYPE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Name'
|
||||
column_header: 'DEBTOR_NAME'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:ID/@xsi:type'
|
||||
column_header: 'DEBTOR_ID_TYPE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:ID/ns:value'
|
||||
column_header: 'DEBTOR_ID'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Class'
|
||||
column_header: 'DEBTOR_CLASS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Residence'
|
||||
column_header: 'DEBTOR_RESIDENCE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:CreditAssessmentSource'
|
||||
column_header: 'DEBTOR_CAS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:CredAssessSysProvider'
|
||||
column_header: 'DEBTOR_CRED_PROV'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'DEBTOR_RATING_ENUM_VALUE'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Debtor/ns:Rating/ns:NumberValue'
|
||||
column_header: 'DEBTOR_RATING_NUMBER_VALUE'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_TYPE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_NAME'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_ID_TYPE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_ID'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_CLASS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_RESIDENCE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_CRED_CAS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_CRED_PROV'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_RATING_ENUM_VALUE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'GUAR_RATING_NUMBER_VALUE'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NumberOfAggregatedAssets'
|
||||
column_header: 'NO_AGGR_ASSETS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Denomination'
|
||||
column_header: 'DENOMINATION'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:Secured'
|
||||
column_header: 'SECURED_FLAG'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ResidualMaturity'
|
||||
column_header: 'RESIDUAL_MATURITY'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:BucketSize'
|
||||
column_header: 'BUCKET_SIZE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NCBComment'
|
||||
column_header: 'NCB_COMMENT'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ValuationMethodology'
|
||||
column_header: 'VALUATION_METHODOLOGY'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:NominalAmountSubmitted'
|
||||
column_header: 'NOM_AMT_SUBMITTED'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ResettingPeriodMoreThanOneYear'
|
||||
column_header: 'RESET_PERIOD_MORE_ONE_YEAR'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:MobilisationChannel'
|
||||
column_header: 'MOBILISATION_CHANNEL'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:CCB'
|
||||
column_header: 'CCB'
|
||||
is_key: 'N'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'INVESTOR_SSS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'INTERMEDIARY_SSS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ISSUER_SSS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'SUSPECT_ID'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'QUALITY_CHECK_STATUS'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ERROR_CODE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ERROR_MESSAGE'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'ERROR_POSITION_IN_FILE'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:OaId'
|
||||
column_header: 'OA_ID'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:ContractId'
|
||||
column_header: 'CONTRACT_ID'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '//ns:NonMarketableAssets/ns:NonMarketableAsset/ns:InstrmntId'
|
||||
column_header: 'INSTRMNT_ID'
|
||||
is_key: 'N'
|
||||
@@ -0,0 +1,179 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<DisseminationFile version="R14" xmlns="http://c2d.escb.eu/UseOfCollateralMessage">
|
||||
<MetaInformation>
|
||||
<ReportingNCB>FR</ReportingNCB>
|
||||
<SnapshotDate>2023-02-16</SnapshotDate>
|
||||
<DateCreated>2023-02-23T11:00:35</DateCreated>
|
||||
<NumberOfSuspectRecords>0</NumberOfSuspectRecords>
|
||||
</MetaInformation>
|
||||
<MarketableAssets>
|
||||
<MarketableAsset>
|
||||
<MFIId>FR10107</MFIId>
|
||||
<RegistrationCode xsi:type="isin" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<ISINCode>BE6302866973</ISINCode>
|
||||
</RegistrationCode>
|
||||
<NominalAmountSubmitted>40</NominalAmountSubmitted>
|
||||
<CollateralValueBeforeHaircuts>41.92566012</CollateralValueBeforeHaircuts>
|
||||
<CollateralValueAfterHaircuts>40.75174164</CollateralValueAfterHaircuts>
|
||||
<TypeOfSystem>pool</TypeOfSystem>
|
||||
<DomesticOrXborder>cross-border</DomesticOrXborder>
|
||||
<MobilisationChannel>CCBM mkt</MobilisationChannel>
|
||||
<CCB>BE</CCB>
|
||||
<InvestorSSS>CLBE01</InvestorSSS>
|
||||
<IssuerSSS>CLBE01</IssuerSSS>
|
||||
</MarketableAsset>
|
||||
<MarketableAsset>
|
||||
<MFIId>FR10107</MFIId>
|
||||
<RegistrationCode xsi:type="isin" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<ISINCode>DE000A1RQCP0</ISINCode>
|
||||
</RegistrationCode>
|
||||
<NominalAmountSubmitted>10</NominalAmountSubmitted>
|
||||
<CollateralValueBeforeHaircuts>10.2664863</CollateralValueBeforeHaircuts>
|
||||
<CollateralValueAfterHaircuts>10.06115657</CollateralValueAfterHaircuts>
|
||||
<TypeOfSystem>pool</TypeOfSystem>
|
||||
<DomesticOrXborder>cross-border</DomesticOrXborder>
|
||||
<MobilisationChannel>CCBM mkt</MobilisationChannel>
|
||||
<CCB>DE</CCB>
|
||||
<InvestorSSS>CLDE01</InvestorSSS>
|
||||
<IssuerSSS>CLDE01</IssuerSSS>
|
||||
</MarketableAsset>
|
||||
<MarketableAsset>
|
||||
<MFIId>FR10107</MFIId>
|
||||
<RegistrationCode xsi:type="isin" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<ISINCode>ES0305248009</ISINCode>
|
||||
</RegistrationCode>
|
||||
<NominalAmountSubmitted>7.5753425</NominalAmountSubmitted>
|
||||
<CollateralValueBeforeHaircuts>7.31191527</CollateralValueBeforeHaircuts>
|
||||
<CollateralValueAfterHaircuts>6.78545737</CollateralValueAfterHaircuts>
|
||||
<TypeOfSystem>pool</TypeOfSystem>
|
||||
<DomesticOrXborder>cross-border</DomesticOrXborder>
|
||||
<MobilisationChannel>CCBM mkt</MobilisationChannel>
|
||||
<CCB>ES</CCB>
|
||||
<InvestorSSS>CLES01</InvestorSSS>
|
||||
<IssuerSSS>CLES01</IssuerSSS>
|
||||
</MarketableAsset>
|
||||
</MarketableAssets>
|
||||
<NonMarketableAssets>
|
||||
<NonMarketableAsset>
|
||||
<MFIId>FR10107</MFIId>
|
||||
<OtherRegistrationNumber>FRCPACAGGREGATE</OtherRegistrationNumber>
|
||||
<TypeOfSystem>pool</TypeOfSystem>
|
||||
<DomesticOrXborder>domestic</DomesticOrXborder>
|
||||
<NonMktAssetType>Credit claim</NonMktAssetType>
|
||||
<DateOfMaturity>2023-07-08</DateOfMaturity>
|
||||
<InterestPaymentType>Fixed</InterestPaymentType>
|
||||
<CollateralValueBeforeHaircuts>100</CollateralValueBeforeHaircuts>
|
||||
<CollateralValueAfterHaircuts>88.5</CollateralValueAfterHaircuts>
|
||||
<NumberOfAggregatedDebtors>2</NumberOfAggregatedDebtors>
|
||||
<EligibleViaGuarantor>N</EligibleViaGuarantor>
|
||||
<Debtor xsi:type="DG3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<Name>Aggregate</Name>
|
||||
<ID xsi:type="NCB">
|
||||
<value>Aggregate</value>
|
||||
</ID>
|
||||
<Class>NFC-PSE3</Class>
|
||||
<Residence>FR</Residence>
|
||||
<CreditAssessmentSource>ICAS</CreditAssessmentSource>
|
||||
<CredAssessSysProvider>Banque de France</CredAssessSysProvider>
|
||||
<Rating>
|
||||
<NumberValue>0.0003</NumberValue>
|
||||
</Rating>
|
||||
</Debtor>
|
||||
<NumberOfAggregatedAssets>2</NumberOfAggregatedAssets>
|
||||
<Denomination>EUR</Denomination>
|
||||
<Secured>N</Secured>
|
||||
<ResidualMaturity>1-3</ResidualMaturity>
|
||||
<BucketSize>0_to_100</BucketSize>
|
||||
<NCBComment>10107</NCBComment>
|
||||
<ValuationMethodology>Outstanding</ValuationMethodology>
|
||||
<NominalAmountSubmitted>10</NominalAmountSubmitted>
|
||||
<MobilisationChannel>Local dom nonmkt</MobilisationChannel>
|
||||
<CCB>FR</CCB>
|
||||
<OaId>10107</OaId>
|
||||
<ContractId>1549493</ContractId>
|
||||
<InstrmntId>1549493</InstrmntId>
|
||||
</NonMarketableAsset>
|
||||
<NonMarketableAsset>
|
||||
<MFIId>FR11188</MFIId>
|
||||
<OtherRegistrationNumber>FRC000748968616</OtherRegistrationNumber>
|
||||
<TypeOfSystem>pool</TypeOfSystem>
|
||||
<DomesticOrXborder>domestic</DomesticOrXborder>
|
||||
<NonMktAssetType>ACC</NonMktAssetType>
|
||||
<DateOfMaturity>2023-09-30</DateOfMaturity>
|
||||
<InterestPaymentType>Floating</InterestPaymentType>
|
||||
<Cap>N</Cap>
|
||||
<ReferenceRate>3MEUBOR</ReferenceRate>
|
||||
<CollateralValueBeforeHaircuts>200</CollateralValueBeforeHaircuts>
|
||||
<CollateralValueAfterHaircuts>160</CollateralValueAfterHaircuts>
|
||||
<NumberOfAggregatedDebtors>1</NumberOfAggregatedDebtors>
|
||||
<EligibleViaGuarantor>N</EligibleViaGuarantor>
|
||||
<Debtor xsi:type="DG3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<Name>SAS UNIP SAINT MALO AUTOMOBILES DISTRIBUTION</Name>
|
||||
<ID xsi:type="NCB">
|
||||
<value>FR895780419</value>
|
||||
</ID>
|
||||
<Class>Other</Class>
|
||||
<Residence>FR</Residence>
|
||||
<CreditAssessmentSource>ICAS</CreditAssessmentSource>
|
||||
<CredAssessSysProvider>Banque de France</CredAssessSysProvider>
|
||||
<Rating>
|
||||
<NumberValue>0.0051</NumberValue>
|
||||
</Rating>
|
||||
</Debtor>
|
||||
<NumberOfAggregatedAssets>1</NumberOfAggregatedAssets>
|
||||
<Denomination>EUR</Denomination>
|
||||
<Secured>N</Secured>
|
||||
<ResidualMaturity>10-15</ResidualMaturity>
|
||||
<NCBComment>11188</NCBComment>
|
||||
<ValuationMethodology>Outstanding</ValuationMethodology>
|
||||
<NominalAmountSubmitted>999999.99999999</NominalAmountSubmitted>
|
||||
<ResettingPeriodMoreThanOneYear>N</ResettingPeriodMoreThanOneYear>
|
||||
<MobilisationChannel>Local dom nonmkt</MobilisationChannel>
|
||||
<CCB>FR</CCB>
|
||||
<OaId>11188</OaId>
|
||||
<ContractId>R05020ETC</ContractId>
|
||||
<InstrmntId>202095459110</InstrmntId>
|
||||
</NonMarketableAsset>
|
||||
<NonMarketableAsset>
|
||||
<MFIId>FR11188</MFIId>
|
||||
<OtherRegistrationNumber>FRC000748968732</OtherRegistrationNumber>
|
||||
<TypeOfSystem>pool</TypeOfSystem>
|
||||
<DomesticOrXborder>domestic</DomesticOrXborder>
|
||||
<NonMktAssetType>ACC</NonMktAssetType>
|
||||
<DateOfMaturity>2023-09-30</DateOfMaturity>
|
||||
<InterestPaymentType>Floating</InterestPaymentType>
|
||||
<Cap>N</Cap>
|
||||
<ReferenceRate>3MEUBOR</ReferenceRate>
|
||||
<CollateralValueBeforeHaircuts>300</CollateralValueBeforeHaircuts>
|
||||
<CollateralValueAfterHaircuts>201</CollateralValueAfterHaircuts>
|
||||
<NumberOfAggregatedDebtors>1</NumberOfAggregatedDebtors>
|
||||
<EligibleViaGuarantor>N</EligibleViaGuarantor>
|
||||
<Debtor xsi:type="DG3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<Name>ALLIANCE E.S.D.B</Name>
|
||||
<ID xsi:type="NCB">
|
||||
<value>FR347861981</value>
|
||||
</ID>
|
||||
<Class>Other</Class>
|
||||
<Residence>FR</Residence>
|
||||
<CreditAssessmentSource>ICAS</CreditAssessmentSource>
|
||||
<CredAssessSysProvider>Banque de France</CredAssessSysProvider>
|
||||
<Rating>
|
||||
<NumberValue>0.0051</NumberValue>
|
||||
</Rating>
|
||||
</Debtor>
|
||||
<NumberOfAggregatedAssets>1</NumberOfAggregatedAssets>
|
||||
<Denomination>EUR</Denomination>
|
||||
<Secured>N</Secured>
|
||||
<ResidualMaturity>+30</ResidualMaturity>
|
||||
<NCBComment>11188</NCBComment>
|
||||
<ValuationMethodology>Outstanding</ValuationMethodology>
|
||||
<NominalAmountSubmitted>0</NominalAmountSubmitted>
|
||||
<ResettingPeriodMoreThanOneYear>N</ResettingPeriodMoreThanOneYear>
|
||||
<MobilisationChannel>Local dom nonmkt</MobilisationChannel>
|
||||
<CCB>FR</CCB>
|
||||
<OaId>11188</OaId>
|
||||
<ContractId>R05320ETC</ContractId>
|
||||
<InstrmntId>202095459010</InstrmntId>
|
||||
</NonMarketableAsset>
|
||||
</NonMarketableAssets>
|
||||
</DisseminationFile>
|
||||
0
airflow/ods/csdb/debt/.gitkeep
Normal file
0
airflow/ods/csdb/debt/.gitkeep
Normal file
0
airflow/ods/csdb/debt/config/.gitkeep
Normal file
0
airflow/ods/csdb/debt/config/.gitkeep
Normal file
398
airflow/ods/csdb/debt/config/m_ODS_CSDB_DEBT_PARSE.yaml
Normal file
398
airflow/ods/csdb/debt/config/m_ODS_CSDB_DEBT_PARSE.yaml
Normal file
@@ -0,0 +1,398 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/CSDB/CentralizedSecuritiesDissemination
|
||||
archive_prefix: ARCHIVE/CSDB/CentralizedSecuritiesDissemination
|
||||
workflow_name: w_ODS_CSDB_DEBT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_CSDB_DEBT_PARSE
|
||||
ods_prefix: INBOX/CSDB/CentralizedSecuritiesDissemination/CSDB_DEBT
|
||||
output_table: CSDB_DEBT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'Date last modified'
|
||||
column_header: 'NEWUPDATED'
|
||||
- type: 'csv_header'
|
||||
value: 'Extraction date'
|
||||
column_header: 'IDLOADDATE_DIM'
|
||||
- type: 'csv_header'
|
||||
value: 'ISIN code'
|
||||
column_header: 'EXTERNALCODE_ISIN'
|
||||
- type: 'csv_header'
|
||||
value: 'National instrument code type'
|
||||
column_header: 'EXTERNALCODETYPE_NC'
|
||||
- type: 'csv_header'
|
||||
value: 'National instrument code'
|
||||
column_header: 'EXTERNALCODE_NATIONAL'
|
||||
- type: 'csv_header'
|
||||
value: 'Internal instrument code'
|
||||
column_header: 'IDIRINSTRUMENT'
|
||||
- type: 'csv_header'
|
||||
value: 'Short name'
|
||||
column_header: 'SHORTNAME'
|
||||
- type: 'csv_header'
|
||||
value: 'Bond duration'
|
||||
column_header: 'VA_BONDDURATION'
|
||||
- type: 'csv_header'
|
||||
value: 'Debt type'
|
||||
column_header: 'IDIRDEBTTYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Asset securitisation type'
|
||||
column_header: 'IDIRASSETSECTYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'CFI classification'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_CFI'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 95 class'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_ESAI'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 95 class - value type'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_ESAI_DM'
|
||||
- type: 'csv_header'
|
||||
value: 'Nominal currency'
|
||||
column_header: 'IDIRCURRENCY_NOMINAL'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount issued'
|
||||
column_header: 'AMOUNTISSUED'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount outstanding'
|
||||
column_header: 'AMOUNTOUTSTANDING'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount outstanding in EUR'
|
||||
column_header: 'AMOUNTOUTSTANDING_EUR'
|
||||
- type: 'csv_header'
|
||||
value: 'Pool factor'
|
||||
column_header: 'POOLFACTOR'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue price'
|
||||
column_header: 'ISSUEPRICE'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue date'
|
||||
column_header: 'IDISSUEDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon type'
|
||||
column_header: 'IDIRCOUPONTYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Last Coupon frequency'
|
||||
column_header: 'IDIRCOUPONFREQUENCY'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon currency'
|
||||
column_header: 'IDIRCURRENCY_COUPON'
|
||||
- type: 'csv_header'
|
||||
value: 'Last Coupon rate'
|
||||
column_header: 'COUPONRATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Last Coupon date'
|
||||
column_header: 'COUPONDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption type'
|
||||
column_header: 'IDIRREDEMPTIONTYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption frequency'
|
||||
column_header: 'IDIRREDEMPTIONFREQUENCY'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption currency'
|
||||
column_header: 'IDIRCURRENCY_REDEMPTION'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption price'
|
||||
column_header: 'REDEMPTIONPRICE'
|
||||
- type: 'csv_header'
|
||||
value: 'Maturity date'
|
||||
column_header: 'IDMATURITYDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer organisation alias type'
|
||||
column_header: 'IDIRORGANISATIONALIASTYPE_IS'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer organisation alias code'
|
||||
column_header: 'ISSUERSOURCECODE'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer MFI code'
|
||||
column_header: 'ISSUEREXTERNALCODE_MFI'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer BIC code'
|
||||
column_header: 'ISSUEREXTERNALCODE_BIC'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer BEI code'
|
||||
column_header: 'ISSUEREXTERNALCODE_BEI'
|
||||
- type: 'csv_header'
|
||||
value: 'Internal organisation code'
|
||||
column_header: 'IDIRORGANISATION_ISSUER'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer name'
|
||||
column_header: 'ISSUERNAME'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer domicile country'
|
||||
column_header: 'IDIRCOUNTRY'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer domicile country - value type'
|
||||
column_header: 'IDIRCOUNTRY_DM'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 95 sector'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_ESAO'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 95 sector - value type'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_ESAO_DM'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer NACE sector'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_NACE'
|
||||
- type: 'csv_header'
|
||||
value: 'Price date'
|
||||
column_header: 'PUBLICATIONPRICEDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Price value'
|
||||
column_header: 'PUBLICATIONPRICE'
|
||||
- type: 'csv_header'
|
||||
value: 'Price value - type'
|
||||
column_header: 'PUBLICATIONPRICETYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Quotation basis'
|
||||
column_header: 'PUBLICATIONPRICEQUOTATIONBASIS'
|
||||
- type: 'csv_header'
|
||||
value: 'Monthly average price'
|
||||
column_header: 'MONTHLYAVERAGEPRICE'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrual start date'
|
||||
column_header: 'ACCRUALSTARTDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income factor'
|
||||
column_header: 'DEBTACCRUALDEBTOR'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income factor - value type'
|
||||
column_header: 'DEBTACCRUALDEBTOR_DM'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income (Creditor)'
|
||||
column_header: 'DEBTACCRUALCREDITOR'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income (Creditor) - value type'
|
||||
column_header: 'DEBTACCRUALCREDITOR_TYP'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued interest'
|
||||
column_header: 'ACCRUEDINTEREST'
|
||||
- type: 'csv_header'
|
||||
value: 'Yield to maturity'
|
||||
column_header: 'YTMNONOPTIONADJUSTED'
|
||||
- type: 'csv_header'
|
||||
value: 'ESCB issuer identifier'
|
||||
column_header: 'ESCB_ISSUER_IDENT'
|
||||
- type: 'csv_header'
|
||||
value: 'ESCB issuer identifier type'
|
||||
column_header: 'VA_ESCBCODETYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer compound ID'
|
||||
column_header: 'IDUDCMPPARTY'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount Oustanding type'
|
||||
column_header: 'AMOUNTOUTSTANDINGTYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Market Capitalisation'
|
||||
column_header: 'MARKETCAPITALISATION'
|
||||
- type: 'csv_header'
|
||||
value: 'Market Capitalisation in euro'
|
||||
column_header: 'MARKETCAPITALISATION_EUR'
|
||||
- type: 'csv_header'
|
||||
value: 'Security Status'
|
||||
column_header: 'VA_SECURITYSTATUS'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument suppl class'
|
||||
column_header: 'VA_INSTRSUPPLEMENTARYCLASS'
|
||||
- type: 'csv_header'
|
||||
value: 'Residual maturity class'
|
||||
column_header: 'VA_RESIDUALMATURITYCLASS'
|
||||
- type: 'csv_header'
|
||||
value: 'Is In SEC'
|
||||
column_header: 'VA_ISINSEC'
|
||||
- type: 'csv_header'
|
||||
value: 'Is In EADB'
|
||||
column_header: 'VA_ISELIGIBLEFOREADB'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 2010 class'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_ESAI10'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 2010 sector'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_ESAO10'
|
||||
- type: 'csv_header'
|
||||
value: 'Primary asset classification 2'
|
||||
column_header: 'IDIRDEBTTYPE_N'
|
||||
- type: 'csv_header'
|
||||
value: 'Instruments seniority type'
|
||||
column_header: 'SENIORITY'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer LEI code'
|
||||
column_header: 'ISSUEREXTERNALCODE_LEI'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 2010 class - value type'
|
||||
column_header: 'INSTR_ESA2010_CLASS_VALUETYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 2010 class - value type'
|
||||
column_header: 'ISS_ESA2010_CLASS_VALUETYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Security status date'
|
||||
column_header: 'VA_SECURITYSTATUSDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Group type'
|
||||
column_header: 'GROUP_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'Has embedded option'
|
||||
column_header: 'HASEMBEDDEDOPTION'
|
||||
- type: 'csv_header'
|
||||
value: 'Volume traded'
|
||||
column_header: 'VOLUMETRADED'
|
||||
- type: 'csv_header'
|
||||
value: 'Primary listing name'
|
||||
column_header: 'PRIMARYLISTINGNAME'
|
||||
- type: 'csv_header'
|
||||
value: 'Primary listing residency country'
|
||||
column_header: 'PRIMARYLISTINGCOUNTRY'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument portfolio flags'
|
||||
column_header: 'VA_INSTRPORTFLAGS'
|
||||
- type: 'csv_header'
|
||||
value: 'Residual maturity'
|
||||
column_header: 'RESIDUALMATURITY'
|
||||
- type: 'csv_header'
|
||||
value: 'Original maturity'
|
||||
column_header: 'ORIGINAL_MATURITY'
|
||||
- type: 'csv_header'
|
||||
value: 'CFIN classification'
|
||||
column_header: 'IDIRCLASSIFICATIONCODE_CFIN'
|
||||
- type: 'csv_header'
|
||||
value: 'First scheduled Coupon date'
|
||||
column_header: 'COUPONFIRSTPAYMENTDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Last scheduled Coupon date'
|
||||
column_header: 'COUPONLASTPAYMENTDATE'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate underlying ISIN'
|
||||
column_header: 'COUPONRATEUNDERLYINGCODE_ISIN'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate spread'
|
||||
column_header: 'COUPONRATESPREAD'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate multiplier'
|
||||
column_header: 'COUPONRATEMULTIPLIER'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate cap'
|
||||
column_header: 'COUPONRATECAP'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate floor'
|
||||
column_header: 'COUPONRATEFLOOR'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue date tranche'
|
||||
column_header: 'IDISSUEDATE_TRANCHE'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue price tranche'
|
||||
column_header: 'ISSUEPRICE_TRANCHE'
|
||||
- type: 'csv_header'
|
||||
value: 'Is private placement'
|
||||
column_header: 'VA_ISPRIVATEPLACEMENT'
|
||||
- type: 'csv_header'
|
||||
value: 'RIAD code'
|
||||
column_header: 'RIAD_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'RIAD OUID'
|
||||
column_header: 'RIAD_OUID'
|
||||
- type: 'csv_header'
|
||||
value: 'ESG Flag 1'
|
||||
column_header: 'ESG1'
|
||||
- type: 'csv_header'
|
||||
value: 'ESG Flag 2'
|
||||
column_header: 'ESG2'
|
||||
- type: 'csv_header'
|
||||
value: 'ESG Flag 3'
|
||||
column_header: 'ESG3'
|
||||
- type: 'csv_header'
|
||||
value: 'Strip'
|
||||
column_header: 'STRIP'
|
||||
- type: 'csv_header'
|
||||
value: 'Depository receipt'
|
||||
column_header: 'DEPOSITORY_RECEIPT'
|
||||
- type: 'csv_header'
|
||||
value: 'Rule 144A'
|
||||
column_header: 'RULE_144A'
|
||||
- type: 'csv_header'
|
||||
value: 'Reg S'
|
||||
column_header: 'REG_S'
|
||||
- type: 'csv_header'
|
||||
value: 'Warrant'
|
||||
column_header: 'WARRANT'
|
||||
- type: 'csv_header'
|
||||
value: 'CSEC Relevance - stock'
|
||||
column_header: 'CSEC_RELEVANCE_STOCK'
|
||||
- type: 'csv_header'
|
||||
value: 'CSEC relevance - gross issuance'
|
||||
column_header: 'CSEC_RELEVANCE_GROSS_ISSUANCE'
|
||||
- type: 'csv_header'
|
||||
value: 'CSEC relevance - redemption'
|
||||
column_header: 'CSEC_RELEVANCE_REDEMPTION'
|
||||
- type: 'csv_header'
|
||||
value: 'Accruing coupon'
|
||||
column_header: 'ACCRUING_COUPON'
|
||||
- type: 'csv_header'
|
||||
value: 'Accruing discount'
|
||||
column_header: 'ACCRUING_DISCOUNT'
|
||||
- type: 'csv_header'
|
||||
value: 'STEP Id'
|
||||
column_header: 'STEPID'
|
||||
- type: 'csv_header'
|
||||
value: 'Program Name'
|
||||
column_header: 'PROGRAMNAME'
|
||||
- type: 'csv_header'
|
||||
value: 'Program Ceiling'
|
||||
column_header: 'PROGRAMCEILING'
|
||||
- type: 'csv_header'
|
||||
value: 'Program Status'
|
||||
column_header: 'PROGRAMSTATUS'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer NACE21 sector'
|
||||
column_header: 'ISSUERNACE21SECTOR'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument quotation basis'
|
||||
column_header: 'INSTRUMENTQUOTATIONBASIS'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 38'
|
||||
column_header: 'PLACEHOLDER38'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 39'
|
||||
column_header: 'PLACEHOLDER39'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 40'
|
||||
column_header: 'PLACEHOLDER40'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 41'
|
||||
column_header: 'PLACEHOLDER41'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 42'
|
||||
column_header: 'PLACEHOLDER42'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 43'
|
||||
column_header: 'PLACEHOLDER43'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 44'
|
||||
column_header: 'PLACEHOLDER44'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 45'
|
||||
column_header: 'PLACEHOLDER45'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 46'
|
||||
column_header: 'PLACEHOLDER46'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 47'
|
||||
column_header: 'PLACEHOLDER47'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 48'
|
||||
column_header: 'PLACEHOLDER48'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 49'
|
||||
column_header: 'PLACEHOLDER49'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 50'
|
||||
column_header: 'PLACEHOLDER50'
|
||||
@@ -0,0 +1,400 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination
|
||||
archive_prefix: ARCHIVE/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination
|
||||
workflow_name: w_ODS_CSDB_DEBT_DAILY
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_CSDB_DEBT_DAILY_PARSE
|
||||
ods_prefix: INBOX/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination/CSDB_DEBT_DAILY
|
||||
output_table: CSDB_DEBT_DAILY
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'Date last modified'
|
||||
column_header: 'Date last modified'
|
||||
- type: 'csv_header'
|
||||
value: 'Extraction date'
|
||||
column_header: 'Extraction date'
|
||||
- type: 'csv_header'
|
||||
value: 'ISIN code'
|
||||
column_header: 'ISIN code'
|
||||
- type: 'csv_header'
|
||||
value: 'National instrument code type'
|
||||
column_header: 'National instrument code type'
|
||||
- type: 'csv_header'
|
||||
value: 'National instrument code'
|
||||
column_header: 'National instrument code'
|
||||
- type: 'csv_header'
|
||||
value: 'Internal instrument code'
|
||||
column_header: 'Internal instrument code'
|
||||
- type: 'csv_header'
|
||||
value: 'Short name'
|
||||
column_header: 'Short name'
|
||||
- type: 'csv_header'
|
||||
value: 'Bond duration'
|
||||
column_header: 'Bond duration'
|
||||
- type: 'csv_header'
|
||||
value: 'Debt type'
|
||||
column_header: 'Debt type'
|
||||
- type: 'csv_header'
|
||||
value: 'Asset securitisation type'
|
||||
column_header: 'Asset securitisation type'
|
||||
- type: 'csv_header'
|
||||
value: 'CFI classification'
|
||||
column_header: 'CFI classification'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 95 class'
|
||||
column_header: 'Instrument ESA 95 class'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 95 class - value type'
|
||||
column_header: 'Instrument ESA 95 class - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Nominal currency'
|
||||
column_header: 'Nominal currency'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount issued'
|
||||
column_header: 'Amount issued'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount outstanding'
|
||||
column_header: 'Amount outstanding'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount outstanding in EUR'
|
||||
column_header: 'Amount outstanding in EUR'
|
||||
- type: 'csv_header'
|
||||
value: 'Pool factor'
|
||||
column_header: 'Pool factor'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue price'
|
||||
column_header: 'Issue price'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue date'
|
||||
column_header: 'Issue date'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon type'
|
||||
column_header: 'Coupon type'
|
||||
- type: 'csv_header'
|
||||
value: 'Last Coupon frequency'
|
||||
column_header: 'Last Coupon frequency'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon currency'
|
||||
column_header: 'Coupon currency'
|
||||
- type: 'csv_header'
|
||||
value: 'Last Coupon rate'
|
||||
column_header: 'Last Coupon rate'
|
||||
- type: 'csv_header'
|
||||
value: 'Last Coupon date'
|
||||
column_header: 'Last Coupon date'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption type'
|
||||
column_header: 'Redemption type'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption frequency'
|
||||
column_header: 'Redemption frequency'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption currency'
|
||||
column_header: 'Redemption currency'
|
||||
- type: 'csv_header'
|
||||
value: 'Redemption price'
|
||||
column_header: 'Redemption price'
|
||||
- type: 'csv_header'
|
||||
value: 'Maturity date'
|
||||
column_header: 'Maturity date'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer organisation alias type'
|
||||
column_header: 'Issuer organisation alias type'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer organisation alias code'
|
||||
column_header: 'Issuer organisation alias code'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer MFI code'
|
||||
column_header: 'Issuer MFI code'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer BIC code'
|
||||
column_header: 'Issuer BIC code'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer BEI code'
|
||||
column_header: 'Issuer BEI code'
|
||||
- type: 'csv_header'
|
||||
value: 'Internal organisation code'
|
||||
column_header: 'Internal organisation code'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer name'
|
||||
column_header: 'Issuer name'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer domicile country'
|
||||
column_header: 'Issuer domicile country'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer domicile country - value type'
|
||||
column_header: 'Issuer domicile country - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 95 sector'
|
||||
column_header: 'Issuer ESA 95 sector'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 95 sector - value type'
|
||||
column_header: 'Issuer ESA 95 sector - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer NACE sector'
|
||||
column_header: 'Issuer NACE sector'
|
||||
- type: 'csv_header'
|
||||
value: 'Price date'
|
||||
column_header: 'Price date'
|
||||
- type: 'csv_header'
|
||||
value: 'Price value'
|
||||
column_header: 'Price value'
|
||||
- type: 'csv_header'
|
||||
value: 'Price value - type'
|
||||
column_header: 'Price value - type'
|
||||
- type: 'csv_header'
|
||||
value: 'Quotation basis'
|
||||
column_header: 'Quotation basis'
|
||||
- type: 'csv_header'
|
||||
value: 'Monthly average price'
|
||||
column_header: 'Monthly average price'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrual start date'
|
||||
column_header: 'Accrual start date'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income factor'
|
||||
column_header: 'Accrued income factor'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income factor - value type'
|
||||
column_header: 'Accrued income factor - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income (Creditor)'
|
||||
column_header: 'Accrued income (Creditor)'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued income (Creditor) - value type'
|
||||
column_header: 'Accrued income (Creditor) - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Accrued interest'
|
||||
column_header: 'Accrued interest'
|
||||
- type: 'csv_header'
|
||||
value: 'Yield to maturity'
|
||||
column_header: 'Yield to maturity'
|
||||
- type: 'csv_header'
|
||||
value: 'ESCB issuer identifier'
|
||||
column_header: 'ESCB issuer identifier'
|
||||
- type: 'csv_header'
|
||||
value: 'ESCB issuer identifier type'
|
||||
column_header: 'ESCB issuer identifier type'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer compound ID'
|
||||
column_header: 'Issuer compound ID'
|
||||
- type: 'csv_header'
|
||||
value: 'Amount Oustanding type'
|
||||
column_header: 'Amount Oustanding type'
|
||||
- type: 'csv_header'
|
||||
value: 'Market Capitalisation'
|
||||
column_header: 'Market Capitalisation'
|
||||
- type: 'csv_header'
|
||||
value: 'Market Capitalisation in euro'
|
||||
column_header: 'Market Capitalisation in euro'
|
||||
- type: 'csv_header'
|
||||
value: 'Security Status'
|
||||
column_header: 'Security Status'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument suppl class'
|
||||
column_header: 'Instrument suppl class'
|
||||
- type: 'csv_header'
|
||||
value: 'Residual maturity class'
|
||||
column_header: 'Residual maturity class'
|
||||
- type: 'csv_header'
|
||||
value: 'Is In SEC'
|
||||
column_header: 'Is In SEC'
|
||||
- type: 'csv_header'
|
||||
value: 'Is In EADB'
|
||||
column_header: 'Is In EADB'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 2010 class'
|
||||
column_header: 'Instrument ESA 2010 class'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 2010 sector'
|
||||
column_header: 'Issuer ESA 2010 sector'
|
||||
- type: 'csv_header'
|
||||
value: 'Primary asset classification 2'
|
||||
column_header: 'Primary asset classification 2'
|
||||
- type: 'csv_header'
|
||||
value: 'Instruments seniority type'
|
||||
column_header: 'Instruments seniority type'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer LEI code'
|
||||
column_header: 'Issuer LEI code'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument ESA 2010 class - value type'
|
||||
column_header: 'Instrument ESA 2010 class - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer ESA 2010 class - value type'
|
||||
column_header: 'Issuer ESA 2010 class - value type'
|
||||
- type: 'csv_header'
|
||||
value: 'Security status date'
|
||||
column_header: 'Security status date'
|
||||
- type: 'csv_header'
|
||||
value: 'Group type'
|
||||
column_header: 'Group type'
|
||||
- type: 'csv_header'
|
||||
value: 'Has embedded option'
|
||||
column_header: 'Has embedded option'
|
||||
- type: 'csv_header'
|
||||
value: 'Volume traded'
|
||||
column_header: 'Volume traded'
|
||||
- type: 'csv_header'
|
||||
value: 'Primary listing name'
|
||||
column_header: 'Primary listing name'
|
||||
- type: 'csv_header'
|
||||
value: 'Primary listing residency country'
|
||||
column_header: 'Primary listing residency country'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument portfolio flags'
|
||||
column_header: 'Instrument portfolio flags'
|
||||
- type: 'static'
|
||||
value: ''
|
||||
column_header: 'BOND_DURATION'
|
||||
- type: 'csv_header'
|
||||
value: 'Residual maturity'
|
||||
column_header: 'Residual maturity'
|
||||
- type: 'csv_header'
|
||||
value: 'Original maturity'
|
||||
column_header: 'Original maturity'
|
||||
- type: 'csv_header'
|
||||
value: 'CFIN classification'
|
||||
column_header: 'CFIN classification'
|
||||
- type: 'csv_header'
|
||||
value: 'First scheduled Coupon date'
|
||||
column_header: 'First scheduled Coupon date'
|
||||
- type: 'csv_header'
|
||||
value: 'Last scheduled Coupon date'
|
||||
column_header: 'Last scheduled Coupon date'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate underlying ISIN'
|
||||
column_header: 'Coupon rate underlying ISIN'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate spread'
|
||||
column_header: 'Coupon rate spread'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate multiplier'
|
||||
column_header: 'Coupon rate multiplier'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate cap'
|
||||
column_header: 'Coupon rate cap'
|
||||
- type: 'csv_header'
|
||||
value: 'Coupon rate floor'
|
||||
column_header: 'Coupon rate floor'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue date tranche'
|
||||
column_header: 'Issue date tranche'
|
||||
- type: 'csv_header'
|
||||
value: 'Issue price tranche'
|
||||
column_header: 'Issue price tranche'
|
||||
- type: 'csv_header'
|
||||
value: 'Is private placement'
|
||||
column_header: 'Is private placement'
|
||||
- type: 'csv_header'
|
||||
value: 'RIAD code'
|
||||
column_header: 'RIAD code'
|
||||
- type: 'csv_header'
|
||||
value: 'RIAD OUID'
|
||||
column_header: 'RIAD OUID'
|
||||
- type: 'csv_header'
|
||||
value: 'ESG Flag 1'
|
||||
column_header: 'ESG Flag 1'
|
||||
- type: 'csv_header'
|
||||
value: 'ESG Flag 2'
|
||||
column_header: 'ESG Flag 2'
|
||||
- type: 'csv_header'
|
||||
value: 'ESG Flag 3'
|
||||
column_header: 'ESG Flag 3'
|
||||
- type: 'csv_header'
|
||||
value: 'Strip'
|
||||
column_header: 'Strip'
|
||||
- type: 'csv_header'
|
||||
value: 'Depository receipt'
|
||||
column_header: 'Depository receipt'
|
||||
- type: 'csv_header'
|
||||
value: 'Rule 144A'
|
||||
column_header: 'Rule 144A'
|
||||
- type: 'csv_header'
|
||||
value: 'Reg S'
|
||||
column_header: 'Reg S'
|
||||
- type: 'csv_header'
|
||||
value: 'Warrant'
|
||||
column_header: 'Warrant'
|
||||
- type: 'csv_header'
|
||||
value: 'CSEC Relevance - stock'
|
||||
column_header: 'CSEC Relevance - stock'
|
||||
- type: 'csv_header'
|
||||
value: 'CSEC relevance - gross issuance'
|
||||
column_header: 'CSEC relevance - gross issuance'
|
||||
- type: 'csv_header'
|
||||
value: 'CSEC relevance - redemption'
|
||||
column_header: 'CSEC relevance - redemption'
|
||||
- type: 'csv_header'
|
||||
value: 'Accruing coupon'
|
||||
column_header: 'Accruing coupon'
|
||||
- type: 'csv_header'
|
||||
value: 'Accruing discount'
|
||||
column_header: 'Accruing discount'
|
||||
- type: 'csv_header'
|
||||
value: 'STEP Id'
|
||||
column_header: 'STEP Id'
|
||||
- type: 'csv_header'
|
||||
value: 'Program Name'
|
||||
column_header: 'Program Name'
|
||||
- type: 'csv_header'
|
||||
value: 'Program Ceiling'
|
||||
column_header: 'Program Ceiling'
|
||||
- type: 'csv_header'
|
||||
value: 'Program Status'
|
||||
column_header: 'Program Status'
|
||||
- type: 'csv_header'
|
||||
value: 'Issuer NACE21 sector'
|
||||
column_header: 'Issuer NACE21 sector'
|
||||
- type: 'csv_header'
|
||||
value: 'Instrument quotation basis'
|
||||
column_header: 'Instrument quotation basis'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 38'
|
||||
column_header: 'placeholder 38'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 39'
|
||||
column_header: 'placeholder 39'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 40'
|
||||
column_header: 'placeholder 40'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 41'
|
||||
column_header: 'placeholder 41'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 42'
|
||||
column_header: 'placeholder 42'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 43'
|
||||
column_header: 'placeholder 43'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 44'
|
||||
column_header: 'placeholder 44'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 45'
|
||||
column_header: 'placeholder 45'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 46'
|
||||
column_header: 'placeholder 46'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 47'
|
||||
column_header: 'placeholder 47'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 48'
|
||||
column_header: 'placeholder 48'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 49'
|
||||
column_header: 'placeholder 49'
|
||||
- type: 'csv_header'
|
||||
value: 'placeholder 50'
|
||||
column_header: 'placeholder 50'
|
||||
0
airflow/ods/csdb/full_ratings/.gitkeep
Normal file
0
airflow/ods/csdb/full_ratings/.gitkeep
Normal file
0
airflow/ods/csdb/full_ratings/config/.gitkeep
Normal file
0
airflow/ods/csdb/full_ratings/config/.gitkeep
Normal file
@@ -0,0 +1,103 @@
|
||||
encoding_type: latin1
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
|
||||
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
|
||||
workflow_name: w_ODS_CSDB_RATINGS_FULL
|
||||
##file format
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_CSDB_INSTR_DESC_FULL_PARSE
|
||||
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_INSTR_DESC_FULL
|
||||
output_table: CSDB_INSTR_DESC_FULL
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'RDB_INSTR_ID'
|
||||
column_header: 'IDIRINSTRUMENT'
|
||||
- type: 'csv_header'
|
||||
value: 'ISIN'
|
||||
column_header: 'ISIN'
|
||||
- type: 'csv_header'
|
||||
value: 'MOO_INSTR_ID'
|
||||
column_header: 'MOO_INSTR_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'SNP_INSTR_ID'
|
||||
column_header: 'SNP_INSTR_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'FTC_INSTR_ID'
|
||||
column_header: 'FITCH_IDENTIFIER'
|
||||
- type: 'csv_header'
|
||||
value: 'DBR_INSTR_ID'
|
||||
column_header: 'DBRS_IDENTIFIER'
|
||||
- type: 'csv_header'
|
||||
value: 'EA_STATUS'
|
||||
column_header: 'EA_STATUS'
|
||||
- type: 'csv_header'
|
||||
value: 'IS_TMS'
|
||||
column_header: 'IS_TMS'
|
||||
- type: 'csv_header'
|
||||
value: 'DBRS_COVERED_BOND_PROGRAM_ID'
|
||||
column_header: 'DBRS_COVERED_BOND_PROGRAM'
|
||||
- type: 'csv_header'
|
||||
value: 'FITCH_PROGRAM_ID'
|
||||
column_header: 'FITCH_PRG_IDENTIFIER'
|
||||
- type: 'csv_header'
|
||||
value: 'MOO_DEAL_NUMBER'
|
||||
column_header: 'MOO_DEAL_NUMBER'
|
||||
- type: 'csv_header'
|
||||
value: 'SNP_PROGRAM_ID'
|
||||
column_header: 'SNP_PROGRAM_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'DBRS_DEBT_TYPE'
|
||||
column_header: 'IDIRDEBTTYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'SNP_DEBT_TYPE'
|
||||
column_header: 'SNP_DEBT_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'MOODY_SENIORITY'
|
||||
column_header: 'MOODY_SENIORITY'
|
||||
- type: 'csv_header'
|
||||
value: 'FITCH_DEBT_LEVEL_CODE'
|
||||
column_header: 'FITCH_DEBT_LEVEL_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'DBRS_RANK_TYPE'
|
||||
column_header: 'DBRS_RANK_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'DBRS_SECURITY_TYPE'
|
||||
column_header: 'DBRS_SECURITY_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'SCO_DEBT_TYPE'
|
||||
column_header: 'SCO_DEBT_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'SCO_INSTR_ID'
|
||||
column_header: 'SCO_INSTR_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'SCO_COVERED_BOND_PROGRAM'
|
||||
column_header: 'SCO_COVERED_BOND_PROGRAM'
|
||||
- type: 'csv_header'
|
||||
value: 'SCO_CATEGORY'
|
||||
column_header: 'SCO_CATEGORY'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER15'
|
||||
column_header: 'PLACEHOLDER15'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER16'
|
||||
column_header: 'PLACEHOLDER16'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER17'
|
||||
column_header: 'PLACEHOLDER17'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER18'
|
||||
column_header: 'PLACEHOLDER18'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER19'
|
||||
column_header: 'PLACEHOLDER19'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER20'
|
||||
column_header: 'PLACEHOLDER20'
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
encoding_type: latin1
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
|
||||
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
|
||||
workflow_name: w_ODS_CSDB_RATINGS_FULL
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_CSDB_INSTR_RAT_FULL_PARSE
|
||||
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_INSTR_RAT_FULL
|
||||
output_table: CSDB_INSTR_RAT_FULL
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'RDB_INSTR_ID'
|
||||
column_header: 'RDB_INSTR_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'SOURCE'
|
||||
column_header: 'SOURCE'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_SCHEME'
|
||||
column_header: 'RATING_SCHEME'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING'
|
||||
column_header: 'RATING'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_DATE'
|
||||
column_header: 'RATING_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'TIME_HORIZON'
|
||||
column_header: 'TIME_HORIZON'
|
||||
- type: 'csv_header'
|
||||
value: 'CURRENCY_TYPE'
|
||||
column_header: 'CURRENCY_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'NOTES'
|
||||
column_header: 'NOTES'
|
||||
- type: 'csv_header'
|
||||
value: 'VALID_FROM'
|
||||
column_header: 'VALID_FROM'
|
||||
- type: 'csv_header'
|
||||
value: 'VALID_UNTIL'
|
||||
column_header: 'VALID_UNTIL'
|
||||
- type: 'csv_header'
|
||||
value: 'RDB_RATINGS_ID'
|
||||
column_header: 'RDB_RATINGS_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'OUTLOOK'
|
||||
column_header: 'WATCHLIST'
|
||||
- type: 'csv_header'
|
||||
value: 'OUTLOOK_DATE'
|
||||
column_header: 'WATCHLIST_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'WATCHLIST'
|
||||
column_header: 'OUTLOOK'
|
||||
- type: 'csv_header'
|
||||
value: 'WATCHLIST_DATE'
|
||||
column_header: 'OUTLOOK_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_ACTION'
|
||||
column_header: 'RATING_ACTION'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_ACTION_DATE'
|
||||
column_header: 'RATING_ACTION_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'IS_PRELIMINARY'
|
||||
column_header: 'IS_PRELIMINARY'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_RAW'
|
||||
column_header: 'RATING_RAW'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_TYPE'
|
||||
column_header: 'RATING_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'ENDORSEMENT_INDICATOR'
|
||||
column_header: 'ENDORSEMENT_INDICATOR'
|
||||
- type: 'csv_header'
|
||||
value: 'LAST_REVIEW_DATE'
|
||||
column_header: 'LAST_REVIEW_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER6'
|
||||
column_header: 'PLACEHOLDER6'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER7'
|
||||
column_header: 'PLACEHOLDER7'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER8'
|
||||
column_header: 'PLACEHOLDER8'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER9'
|
||||
column_header: 'PLACEHOLDER9'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER10'
|
||||
column_header: 'PLACEHOLDER10'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER11'
|
||||
column_header: 'PLACEHOLDER11'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER12'
|
||||
column_header: 'PLACEHOLDER12'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER13'
|
||||
column_header: 'PLACEHOLDER13'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER14'
|
||||
column_header: 'PLACEHOLDER14'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER15'
|
||||
column_header: 'PLACEHOLDER15'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER16'
|
||||
column_header: 'PLACEHOLDER16'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER17'
|
||||
column_header: 'PLACEHOLDER17'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER18'
|
||||
column_header: 'PLACEHOLDER18'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER19'
|
||||
column_header: 'PLACEHOLDER19'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER20'
|
||||
column_header: 'PLACEHOLDER20'
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
encoding_type: latin1
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
|
||||
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
|
||||
workflow_name: w_ODS_CSDB_RATINGS_FULL
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_CSDB_ISSUER_DESC_FULL_PARSE
|
||||
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_ISSUER_DESC_FULL
|
||||
output_table: CSDB_ISSUER_DESC_FULL
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'RDB_ISSUER_ID'
|
||||
column_header: 'RDB_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ISSUER_NAME'
|
||||
column_header: 'ISSUERNAME'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_DOMICILE'
|
||||
column_header: 'COUNTRY_DOMICILE'
|
||||
- type: 'csv_header'
|
||||
value: 'IS_SOVEREIGN'
|
||||
column_header: 'IS_SOVEREIGN'
|
||||
- type: 'csv_header'
|
||||
value: 'MOO_ISSUER_ID'
|
||||
column_header: 'MOODY_IDENTIFIER'
|
||||
- type: 'csv_header'
|
||||
value: 'SNP_ISSUER_ID'
|
||||
column_header: 'SNP_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'FTC_ISSUER_ID'
|
||||
column_header: 'FITCH_IDENTIFIER'
|
||||
- type: 'csv_header'
|
||||
value: 'DBR_ISSUER_ID'
|
||||
column_header: 'DBRS_IDENTIFIER'
|
||||
- type: 'csv_header'
|
||||
value: 'LEI_ISSUER_ID'
|
||||
column_header: 'LEI_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'RIAD_CODE'
|
||||
column_header: 'RIAD_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'RIAD_OUID'
|
||||
column_header: 'RIAD_OUID'
|
||||
- type: 'csv_header'
|
||||
value: 'CLASH_GROUP_STATUS'
|
||||
column_header: 'CLASH_GROUP_STATUS'
|
||||
- type: 'csv_header'
|
||||
value: 'SCO_ISSUER_ID'
|
||||
column_header: 'SCO_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER5'
|
||||
column_header: 'PLACEHOLDER5'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER6'
|
||||
column_header: 'PLACEHOLDER6'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER7'
|
||||
column_header: 'PLACEHOLDER7'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER8'
|
||||
column_header: 'PLACEHOLDER8'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER9'
|
||||
column_header: 'PLACEHOLDER9'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER10'
|
||||
column_header: 'PLACEHOLDER10'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER11'
|
||||
column_header: 'PLACEHOLDER11'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER12'
|
||||
column_header: 'PLACEHOLDER12'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER13'
|
||||
column_header: 'PLACEHOLDER13'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER14'
|
||||
column_header: 'PLACEHOLDER14'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER15'
|
||||
column_header: 'PLACEHOLDER15'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER16'
|
||||
column_header: 'PLACEHOLDER16'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER17'
|
||||
column_header: 'PLACEHOLDER17'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER18'
|
||||
column_header: 'PLACEHOLDER18'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER19'
|
||||
column_header: 'PLACEHOLDER19'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER20'
|
||||
column_header: 'PLACEHOLDER20'
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
encoding_type: latin1
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
|
||||
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
|
||||
workflow_name: w_ODS_CSDB_RATINGS_FULL
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_CSDB_ISSUER_RAT_FULL_PARSE
|
||||
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_ISSUER_RAT_FULL
|
||||
output_table: CSDB_ISSUER_RAT_FULL
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'RDB_ISSUER_ID'
|
||||
column_header: 'RDB_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'SOURCE'
|
||||
column_header: 'SOURCE'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_SCHEME'
|
||||
column_header: 'RATING_SCHEME'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING'
|
||||
column_header: 'RATING'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_DATE'
|
||||
column_header: 'RATING_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'TIME_HORIZON'
|
||||
column_header: 'TIME_HORIZON'
|
||||
- type: 'csv_header'
|
||||
value: 'CURRENCY_TYPE'
|
||||
column_header: 'CURRENCY_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'NOTES'
|
||||
column_header: 'NOTES'
|
||||
- type: 'csv_header'
|
||||
value: 'VALID_FROM'
|
||||
column_header: 'VALID_FROM'
|
||||
- type: 'csv_header'
|
||||
value: 'VALID_UNTIL'
|
||||
column_header: 'VALID_UNTIL'
|
||||
- type: 'csv_header'
|
||||
value: 'RDB_RATINGS_ID'
|
||||
column_header: 'RDB_RATINGS_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'OUTLOOK'
|
||||
column_header: 'OUTLOOK'
|
||||
- type: 'csv_header'
|
||||
value: 'OUTLOOK_DATE'
|
||||
column_header: 'OUTLOOK_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'WATCHLIST'
|
||||
column_header: 'WATCHLIST'
|
||||
- type: 'csv_header'
|
||||
value: 'WATCHLIST_DATE'
|
||||
column_header: 'WATCHLIST_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_ACTION'
|
||||
column_header: 'RATING_ACTION'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_ACTION_DATE'
|
||||
column_header: 'RATING_ACTION_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'IS_PRELIMINARY'
|
||||
column_header: 'IS_PRELIMINARY'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_RAW'
|
||||
column_header: 'RATING_RAW'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_TYPE'
|
||||
column_header: 'RATING_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'ENDORSEMENT_INDICATOR'
|
||||
column_header: 'ENDORSEMENT_INDICATOR'
|
||||
- type: 'csv_header'
|
||||
value: 'LAST_REVIEW_DATE'
|
||||
column_header: 'LAST_REVIEW_DATE'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER6'
|
||||
column_header: 'PLACEHOLDER6'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER7'
|
||||
column_header: 'PLACEHOLDER7'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER8'
|
||||
column_header: 'PLACEHOLDER8'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER9'
|
||||
column_header: 'PLACEHOLDER9'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER10'
|
||||
column_header: 'PLACEHOLDER10'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER11'
|
||||
column_header: 'PLACEHOLDER11'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER12'
|
||||
column_header: 'PLACEHOLDER12'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER13'
|
||||
column_header: 'PLACEHOLDER13'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER14'
|
||||
column_header: 'PLACEHOLDER14'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER15'
|
||||
column_header: 'PLACEHOLDER15'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER16'
|
||||
column_header: 'PLACEHOLDER16'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER17'
|
||||
column_header: 'PLACEHOLDER17'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER18'
|
||||
column_header: 'PLACEHOLDER18'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER19'
|
||||
column_header: 'PLACEHOLDER19'
|
||||
- type: 'csv_header'
|
||||
value: 'PLACEHOLDER20'
|
||||
column_header: 'PLACEHOLDER20'
|
||||
|
||||
|
||||
@@ -0,0 +1,420 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import timedelta, datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.decorators import task as af_task
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.python import get_current_context
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
dag_id = "w_ODS_CSDB_RATINGS_FULL_COORDINATOR"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
OCI_BUCKET = os.getenv("INBOX_BUCKET")
|
||||
OBJECT_PREFIX = os.getenv("OBJECT_PREFIX", "csdb/ratings/full/")
|
||||
REPROCESS = (os.getenv("CSDB_REPROCESS", "false").lower() in ("1", "true", "yes"))
|
||||
LAST_TS_VAR = f"{dag_id}__last_seen_ts"
|
||||
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts"
|
||||
|
||||
|
||||
def _oci_client():
|
||||
import oci
|
||||
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
|
||||
try:
|
||||
rp_signer = oci.auth.signers.get_resource_principals_signer()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
|
||||
except Exception as e:
|
||||
logging.info("RP not available: %s", e)
|
||||
try:
|
||||
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
|
||||
except Exception as e:
|
||||
logging.info("IP not available: %s", e)
|
||||
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
|
||||
raise RuntimeError("Failed to create OCI client")
|
||||
|
||||
|
||||
def _load_processed_map() -> dict[str, float]:
|
||||
try:
|
||||
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
|
||||
m = json.loads(raw) or {}
|
||||
if isinstance(m, dict):
|
||||
return {k: float(v) for k, v in m.items()}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _list_all_zip_objects(include_processed: bool = False) -> list[dict]:
|
||||
"""List all zip files in the bucket"""
|
||||
if not OCI_NAMESPACE or not OCI_BUCKET:
|
||||
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
|
||||
|
||||
client = _oci_client()
|
||||
processed_map = _load_processed_map() if not include_processed else {}
|
||||
|
||||
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=OBJECT_PREFIX)
|
||||
all_items: list[dict] = []
|
||||
|
||||
for o in (resp.data.objects or []):
|
||||
name = (o.name or "").strip()
|
||||
base = name.rsplit("/", 1)[-1] if name else ""
|
||||
|
||||
if not name or name.endswith('/') or not base:
|
||||
continue
|
||||
|
||||
if not ("STC-FullRatingsDissemination" in base and base.lower().endswith(".zip")):
|
||||
continue
|
||||
|
||||
# Get timestamp
|
||||
ts = None
|
||||
t = getattr(o, "time_created", None)
|
||||
if t:
|
||||
try:
|
||||
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
|
||||
except Exception:
|
||||
ts = None
|
||||
|
||||
if ts is None:
|
||||
try:
|
||||
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
|
||||
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
|
||||
if lm:
|
||||
dt = parsedate_to_datetime(lm)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
ts = dt.timestamp()
|
||||
except Exception as e:
|
||||
logging.warning("head_object failed for %s: %s", name, e)
|
||||
|
||||
if ts is None:
|
||||
ts = datetime.now(timezone.utc).timestamp()
|
||||
|
||||
# Check if already processed
|
||||
last_proc_ts = float(processed_map.get(name, 0.0))
|
||||
is_processed = (ts <= last_proc_ts) if processed_map else False
|
||||
|
||||
item = {
|
||||
"name": name,
|
||||
"base": base,
|
||||
"mtime": ts,
|
||||
"is_processed": is_processed
|
||||
}
|
||||
all_items.append(item)
|
||||
|
||||
# Sort by timestamp (oldest first)
|
||||
all_items.sort(key=lambda x: x["mtime"])
|
||||
|
||||
return all_items
|
||||
|
||||
|
||||
def _list_new_zip_objects() -> list[dict]:
|
||||
"""List only new/unprocessed zip files"""
|
||||
all_items = _list_all_zip_objects(include_processed=False)
|
||||
|
||||
# Filter out processed items
|
||||
new_items = [item for item in all_items if not item.get("is_processed", False)]
|
||||
|
||||
logging.info("Found %d new STC-FullRatingsDissemination zip file(s) (sorted oldest to newest)", len(new_items))
|
||||
return new_items
|
||||
|
||||
|
||||
def _find_specific_zip(filename_pattern: str) -> dict:
|
||||
"""Find a specific zip file by name pattern"""
|
||||
all_items = _list_all_zip_objects(include_processed=True)
|
||||
|
||||
# Try exact match first
|
||||
for item in all_items:
|
||||
if item["base"] == filename_pattern or item["name"] == filename_pattern:
|
||||
logging.info("Found exact match: %s", item["base"])
|
||||
return item
|
||||
|
||||
# Try partial match
|
||||
for item in all_items:
|
||||
if filename_pattern.lower() in item["base"].lower():
|
||||
logging.info("Found partial match: %s", item["base"])
|
||||
return item
|
||||
|
||||
raise AirflowFailException(f"No zip file found matching pattern: {filename_pattern}")
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='CSDB Ratings Full Coordinator: Lists and triggers processing for zip files',
|
||||
schedule_interval="0 */6 * * *", # Every 6 hours, adjust as needed
|
||||
catchup=False,
|
||||
max_active_runs=1,
|
||||
render_template_as_native_obj=True,
|
||||
tags=["CSDB", "COORDINATOR", "ODS", "OCI", "RATINGS"],
|
||||
) as dag:
|
||||
|
||||
@af_task(task_id="determine_processing_mode")
|
||||
def determine_processing_mode(**context):
|
||||
"""
|
||||
Determine what to process based on dag_run configuration.
|
||||
|
||||
Configuration options:
|
||||
1. No config or mode='all': Process all new zip files
|
||||
2. mode='specific' + filename='xxx': Process specific zip file
|
||||
3. mode='reprocess_all': Reprocess all zip files (including already processed)
|
||||
4. mode='list_only': Just list available files without processing
|
||||
5. filenames=['file1.zip', 'file2.zip']: Process specific list of files
|
||||
"""
|
||||
conf = context.get('dag_run').conf or {}
|
||||
|
||||
mode = conf.get('mode', 'all')
|
||||
filename = conf.get('filename')
|
||||
filenames = conf.get('filenames', [])
|
||||
force_reprocess = conf.get('force_reprocess', False)
|
||||
limit = conf.get('limit') # Limit number of files to process
|
||||
|
||||
logging.info("Processing mode: %s", mode)
|
||||
logging.info("Configuration: %s", json.dumps(conf, indent=2))
|
||||
|
||||
result = {
|
||||
"mode": mode,
|
||||
"filename": filename,
|
||||
"filenames": filenames,
|
||||
"force_reprocess": force_reprocess,
|
||||
"limit": limit
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
@af_task(task_id="list_zip_files")
|
||||
def list_zip_files(mode_config: dict):
|
||||
"""List zip files based on the processing mode"""
|
||||
mode = mode_config.get("mode", "all")
|
||||
filename = mode_config.get("filename")
|
||||
filenames = mode_config.get("filenames", [])
|
||||
force_reprocess = mode_config.get("force_reprocess", False)
|
||||
limit = mode_config.get("limit")
|
||||
|
||||
zip_files = []
|
||||
|
||||
if mode == "list_only":
|
||||
# Just list all files for information
|
||||
all_files = _list_all_zip_objects(include_processed=True)
|
||||
logging.info("=== Available ZIP Files ===")
|
||||
for idx, f in enumerate(all_files, 1):
|
||||
status = "PROCESSED" if f.get("is_processed") else "NEW"
|
||||
logging.info("%d. [%s] %s (mtime: %s)",
|
||||
idx, status, f["base"],
|
||||
datetime.fromtimestamp(f["mtime"]).isoformat())
|
||||
raise AirflowSkipException("List only mode - no processing triggered")
|
||||
|
||||
elif mode == "specific":
|
||||
# Process a specific file
|
||||
if not filename:
|
||||
raise AirflowFailException("mode='specific' requires 'filename' parameter")
|
||||
|
||||
zip_file = _find_specific_zip(filename)
|
||||
zip_files = [zip_file]
|
||||
logging.info("Processing specific file: %s", zip_file["base"])
|
||||
|
||||
elif mode == "specific_list":
|
||||
# Process a list of specific files
|
||||
if not filenames:
|
||||
raise AirflowFailException("mode='specific_list' requires 'filenames' parameter")
|
||||
|
||||
for fn in filenames:
|
||||
try:
|
||||
zip_file = _find_specific_zip(fn)
|
||||
zip_files.append(zip_file)
|
||||
except Exception as e:
|
||||
logging.warning("Could not find file %s: %s", fn, e)
|
||||
|
||||
if not zip_files:
|
||||
raise AirflowFailException("None of the specified files were found")
|
||||
|
||||
logging.info("Processing %d specific files", len(zip_files))
|
||||
|
||||
elif mode == "reprocess_all":
|
||||
# Reprocess all files (including already processed)
|
||||
all_files = _list_all_zip_objects(include_processed=True)
|
||||
zip_files = all_files
|
||||
logging.info("Reprocessing all %d files", len(zip_files))
|
||||
|
||||
elif mode == "date_range":
|
||||
# Process files within a date range
|
||||
start_date = mode_config.get("start_date")
|
||||
end_date = mode_config.get("end_date")
|
||||
|
||||
if not start_date or not end_date:
|
||||
raise AirflowFailException("mode='date_range' requires 'start_date' and 'end_date'")
|
||||
|
||||
start_ts = datetime.fromisoformat(start_date).timestamp()
|
||||
end_ts = datetime.fromisoformat(end_date).timestamp()
|
||||
|
||||
all_files = _list_all_zip_objects(include_processed=True)
|
||||
zip_files = [f for f in all_files if start_ts <= f["mtime"] <= end_ts]
|
||||
|
||||
logging.info("Found %d files in date range %s to %s",
|
||||
len(zip_files), start_date, end_date)
|
||||
|
||||
else: # mode == "all" or default
|
||||
# Process all new files
|
||||
zip_files = _list_new_zip_objects()
|
||||
|
||||
if not zip_files:
|
||||
logging.info("No new zip files to process")
|
||||
raise AirflowSkipException("No new zip files found")
|
||||
|
||||
# Apply limit if specified
|
||||
if limit and isinstance(limit, int) and limit > 0:
|
||||
original_count = len(zip_files)
|
||||
zip_files = zip_files[:limit]
|
||||
logging.info("Limited processing from %d to %d files", original_count, len(zip_files))
|
||||
|
||||
# Sort by timestamp (oldest first)
|
||||
zip_files.sort(key=lambda x: x["mtime"])
|
||||
|
||||
logging.info("Selected %d zip file(s) for processing:", len(zip_files))
|
||||
for idx, f in enumerate(zip_files, 1):
|
||||
logging.info("%d. %s (mtime: %s)",
|
||||
idx, f["base"],
|
||||
datetime.fromtimestamp(f["mtime"]).isoformat())
|
||||
|
||||
return {
|
||||
"zip_files": zip_files,
|
||||
"mode": mode,
|
||||
"force_reprocess": force_reprocess
|
||||
}
|
||||
|
||||
@af_task(task_id="trigger_processing_dags")
|
||||
def trigger_processing_dags(list_result: dict):
|
||||
"""Trigger the processing DAG for each zip file sequentially"""
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
from time import sleep
|
||||
|
||||
zip_files = list_result.get("zip_files", [])
|
||||
mode = list_result.get("mode", "all")
|
||||
force_reprocess = list_result.get("force_reprocess", False)
|
||||
|
||||
if not zip_files:
|
||||
logging.info("No zip files to process")
|
||||
return []
|
||||
|
||||
triggered_runs = []
|
||||
|
||||
for idx, zip_file in enumerate(zip_files):
|
||||
conf = {
|
||||
"zip_object_name": zip_file["name"],
|
||||
"zip_base_name": zip_file["base"],
|
||||
"zip_mtime": zip_file["mtime"],
|
||||
"sequence_number": idx + 1,
|
||||
"total_files": len(zip_files),
|
||||
"processing_mode": mode,
|
||||
"force_reprocess": force_reprocess,
|
||||
"is_processed": zip_file.get("is_processed", False)
|
||||
}
|
||||
|
||||
logging.info(f"Triggering processing DAG for file {idx + 1}/{len(zip_files)}: {zip_file['base']}")
|
||||
|
||||
try:
|
||||
run_id = trigger_dag(
|
||||
dag_id="w_ODS_CSDB_RATINGS_FULL_CORE",
|
||||
run_id=f"coordinator__{datetime.now().strftime('%Y%m%d_%H%M%S')}__{idx}",
|
||||
conf=conf,
|
||||
execution_date=None,
|
||||
replace_microseconds=False,
|
||||
)
|
||||
|
||||
triggered_runs.append({
|
||||
"run_id": str(run_id),
|
||||
"zip_file": zip_file["base"],
|
||||
"sequence": idx + 1,
|
||||
"status": "triggered"
|
||||
})
|
||||
|
||||
logging.info(f"Successfully triggered run: {run_id}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to trigger processing for {zip_file['base']}: {e}")
|
||||
triggered_runs.append({
|
||||
"zip_file": zip_file["base"],
|
||||
"sequence": idx + 1,
|
||||
"status": "failed",
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
# Small delay between triggers to avoid overwhelming the system
|
||||
sleep(2)
|
||||
|
||||
logging.info(f"Triggered {len([r for r in triggered_runs if r.get('status') == 'triggered'])} processing DAG runs")
|
||||
logging.info(f"Failed to trigger {len([r for r in triggered_runs if r.get('status') == 'failed'])} runs")
|
||||
|
||||
return triggered_runs
|
||||
|
||||
@af_task(task_id="summary_report")
|
||||
def summary_report(trigger_result: list):
|
||||
"""Generate a summary report of triggered runs"""
|
||||
if not trigger_result:
|
||||
logging.info("No runs were triggered")
|
||||
return
|
||||
|
||||
successful = [r for r in trigger_result if r.get("status") == "triggered"]
|
||||
failed = [r for r in trigger_result if r.get("status") == "failed"]
|
||||
|
||||
logging.info("=" * 80)
|
||||
logging.info("PROCESSING SUMMARY")
|
||||
logging.info("=" * 80)
|
||||
logging.info(f"Total files: {len(trigger_result)}")
|
||||
logging.info(f"Successfully triggered: {len(successful)}")
|
||||
logging.info(f"Failed to trigger: {len(failed)}")
|
||||
|
||||
if successful:
|
||||
logging.info("\nSuccessfully triggered:")
|
||||
for r in successful:
|
||||
logging.info(f" - {r['zip_file']} (run_id: {r['run_id']})")
|
||||
|
||||
if failed:
|
||||
logging.info("\nFailed to trigger:")
|
||||
for r in failed:
|
||||
logging.info(f" - {r['zip_file']} (error: {r.get('error', 'unknown')})")
|
||||
|
||||
logging.info("=" * 80)
|
||||
|
||||
return {
|
||||
"total": len(trigger_result),
|
||||
"successful": len(successful),
|
||||
"failed": len(failed)
|
||||
}
|
||||
|
||||
# Build DAG structure
|
||||
mode_task = determine_processing_mode()
|
||||
list_task = list_zip_files(mode_task)
|
||||
trigger_task = trigger_processing_dags(list_task)
|
||||
summary_task = summary_report(trigger_task)
|
||||
|
||||
mode_task >> list_task >> trigger_task >> summary_task
|
||||
|
||||
logging.info("CSDB Ratings Full Coordinator DAG ready")
|
||||
@@ -0,0 +1,388 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from datetime import timedelta, datetime, timezone
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.decorators import task as af_task
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
from airflow.operators.python import get_current_context
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/csdb')
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
dag_id = "w_ODS_CSDB_RATINGS_FULL_CORE"
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id,
|
||||
}
|
||||
|
||||
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
OCI_BUCKET = os.getenv("INBOX_BUCKET")
|
||||
OBJECT_PREFIX = os.getenv("OBJECT_PREFIX", "csdb/ratings/full/")
|
||||
TEMP_DIR = "/tmp/csdb_ratings"
|
||||
PROCESSED_TS_VAR = "w_ODS_CSDB_RATINGS_FULL_COORDINATOR__processed_objects_ts"
|
||||
|
||||
# CSV configurations
|
||||
CSV_CONFIGS = [
|
||||
{
|
||||
"source_filename": "FULL_INSTRUMENT_DESCRIPTION.csv",
|
||||
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_INSTR_DESC_FULL_PARSE.yaml",
|
||||
"task_name": "m_ODS_CSDB_RATINGS_FULL_INSTRUMENT_DESCRIPTION"
|
||||
},
|
||||
{
|
||||
"source_filename": "FULL_INSTRUMENT_RATINGS.csv",
|
||||
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_INSTR_RAT_FULL_PARSE.yaml",
|
||||
"task_name": "m_ODS_CSDB_RATINGS_FULL_INSTRUMENT_RATINGS"
|
||||
},
|
||||
{
|
||||
"source_filename": "FULL_ISSUER_DESCRIPTION.csv",
|
||||
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_ISSUER_DESC_FULL_PARSE.yaml",
|
||||
"task_name": "m_ODS_CSDB_RATINGS_FULL_ISSUER_DESCRIPTION"
|
||||
},
|
||||
{
|
||||
"source_filename": "FULL_ISSUER_RATINGS.csv",
|
||||
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_ISSUER_RAT_FULL_PARSE.yaml",
|
||||
"task_name": "m_ODS_CSDB_RATINGS_FULL_ISSUER_RATINGS"
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def _oci_client():
|
||||
import oci
|
||||
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
|
||||
try:
|
||||
rp_signer = oci.auth.signers.get_resource_principals_signer()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
|
||||
except Exception as e:
|
||||
logging.info("RP not available: %s", e)
|
||||
try:
|
||||
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
|
||||
except Exception as e:
|
||||
logging.info("IP not available: %s", e)
|
||||
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
|
||||
raise RuntimeError("Failed to create OCI client")
|
||||
|
||||
|
||||
def _load_processed_map() -> dict[str, float]:
|
||||
try:
|
||||
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
|
||||
m = json.loads(raw) or {}
|
||||
if isinstance(m, dict):
|
||||
return {k: float(v) for k, v in m.items()}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _save_processed_map(m: dict[str, float]) -> None:
|
||||
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
|
||||
|
||||
|
||||
def _mark_processed(zip_key: str, zip_mtime: float):
|
||||
m = _load_processed_map()
|
||||
m[zip_key] = float(zip_mtime)
|
||||
_save_processed_map(m)
|
||||
logging.info("Marked as processed: %s (mtime=%s)", zip_key, zip_mtime)
|
||||
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='CSDB Ratings Full Processor: Processes one zip file with 4 CSV files in parallel',
|
||||
schedule_interval=None, # Triggered by coordinator
|
||||
catchup=False,
|
||||
max_active_runs=3, # Allow some parallelism but controlled
|
||||
render_template_as_native_obj=True,
|
||||
tags=["CSDB", "PROCESSOR", "MRDS", "ODS", "OCI", "RATINGS"],
|
||||
) as dag:
|
||||
|
||||
@af_task(task_id="get_zip_config")
|
||||
def get_zip_config(**context):
|
||||
"""Get the zip file configuration from dag_run conf"""
|
||||
conf = context['dag_run'].conf or {}
|
||||
|
||||
zip_object_name = conf.get('zip_object_name')
|
||||
zip_base_name = conf.get('zip_base_name')
|
||||
zip_mtime = conf.get('zip_mtime')
|
||||
sequence_number = conf.get('sequence_number', 0)
|
||||
total_files = conf.get('total_files', 0)
|
||||
|
||||
if not all([zip_object_name, zip_base_name, zip_mtime]):
|
||||
raise AirflowFailException("Missing required configuration: zip_object_name, zip_base_name, or zip_mtime")
|
||||
|
||||
logging.info(f"Processing zip file {sequence_number}/{total_files}: {zip_base_name}")
|
||||
|
||||
return {
|
||||
"zip_object_name": zip_object_name,
|
||||
"zip_base_name": zip_base_name,
|
||||
"zip_mtime": zip_mtime,
|
||||
"sequence_number": sequence_number,
|
||||
"total_files": total_files
|
||||
}
|
||||
|
||||
@af_task(task_id="download_and_unzip")
|
||||
def download_and_unzip(config: dict):
|
||||
"""Download and unzip the specific zip file"""
|
||||
zip_key = config["zip_object_name"]
|
||||
zip_base = config["zip_base_name"]
|
||||
|
||||
client = _oci_client()
|
||||
os.makedirs(TEMP_DIR, exist_ok=True)
|
||||
|
||||
# Create unique temp directory for this run
|
||||
run_temp_dir = os.path.join(TEMP_DIR, f"run_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
|
||||
os.makedirs(run_temp_dir, exist_ok=True)
|
||||
|
||||
local_zip = os.path.join(run_temp_dir, zip_base)
|
||||
|
||||
logging.info("Downloading %s to %s", zip_key, local_zip)
|
||||
get_obj = client.get_object(OCI_NAMESPACE, OCI_BUCKET, zip_key)
|
||||
with open(local_zip, 'wb') as f:
|
||||
for chunk in get_obj.data.raw.stream(1024 * 1024, decode_content=False):
|
||||
f.write(chunk)
|
||||
|
||||
logging.info("Unzipping %s", local_zip)
|
||||
with zipfile.ZipFile(local_zip, 'r') as zip_ref:
|
||||
zip_ref.extractall(run_temp_dir)
|
||||
|
||||
extracted_files = []
|
||||
for root, dirs, files in os.walk(run_temp_dir):
|
||||
for file in files:
|
||||
if file.endswith('.csv'):
|
||||
full_path = os.path.join(root, file)
|
||||
extracted_files.append({"filename": file, "path": full_path})
|
||||
logging.info("Extracted CSV: %s", file)
|
||||
|
||||
logging.info("Total CSV files extracted: %d", len(extracted_files))
|
||||
|
||||
return {
|
||||
"extracted_files": extracted_files,
|
||||
"zip_config": config,
|
||||
"temp_dir": run_temp_dir
|
||||
}
|
||||
|
||||
@af_task(task_id="init_workflow")
|
||||
def init_workflow(unzipped: dict):
|
||||
"""Initialize MRDS workflow"""
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
ctx = get_current_context()
|
||||
run_id = str(ctx['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
|
||||
|
||||
extracted_files = unzipped.get("extracted_files", [])
|
||||
zip_config = unzipped.get("zip_config", {})
|
||||
temp_dir = unzipped.get("temp_dir")
|
||||
|
||||
task_configs = []
|
||||
for csv_config in CSV_CONFIGS:
|
||||
matching_file = next(
|
||||
(ef for ef in extracted_files if ef["filename"] == csv_config["source_filename"]),
|
||||
None
|
||||
)
|
||||
if matching_file:
|
||||
task_configs.append({
|
||||
"task_name": csv_config["task_name"],
|
||||
"source_filename": csv_config["source_filename"],
|
||||
"source_path": matching_file["path"],
|
||||
"config_file": csv_config["config_yaml"],
|
||||
})
|
||||
logging.info("Prepared task config for %s", csv_config["source_filename"])
|
||||
else:
|
||||
logging.warning("CSV file %s not found in extracted files", csv_config["source_filename"])
|
||||
|
||||
return {
|
||||
"workflow_history_key": a_workflow_history_key,
|
||||
"task_configs": task_configs,
|
||||
"zip_config": zip_config,
|
||||
"temp_dir": temp_dir
|
||||
}
|
||||
|
||||
def run_mrds_task(task_config: dict, **context):
|
||||
"""Run MRDS processing for a single CSV file"""
|
||||
ti = context['ti']
|
||||
|
||||
task_name = task_config["task_name"]
|
||||
source_path = task_config["source_path"]
|
||||
config_file = task_config["config_file"]
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"Config file not found: {config_file}")
|
||||
if not os.path.exists(source_path):
|
||||
raise FileNotFoundError(f"Source CSV file not found: {source_path}")
|
||||
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
workflow_history_key = init_bundle.get('workflow_history_key')
|
||||
|
||||
if not workflow_history_key:
|
||||
raise AirflowFailException("No workflow_history_key from init_workflow")
|
||||
|
||||
try:
|
||||
logging.info(f"{task_name}: Starting MRDS processing for {source_path}")
|
||||
mrds_main(workflow_history_key, source_path, config_file, generate_workflow_context=False)
|
||||
logging.info(f"{task_name}: MRDS processing completed successfully")
|
||||
except Exception as e:
|
||||
logging.exception(f"{task_name}: MRDS failed on {source_path}")
|
||||
raise
|
||||
|
||||
return "SUCCESS"
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize the workflow and mark zip as processed"""
|
||||
ti = context['ti']
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
|
||||
a_workflow_history_key = init_bundle.get('workflow_history_key')
|
||||
zip_config = init_bundle.get('zip_config', {})
|
||||
|
||||
if a_workflow_history_key is None:
|
||||
raise AirflowFailException("No workflow history key; cannot finalise workflow")
|
||||
|
||||
# Check if any CSV task failed
|
||||
csv_task_ids = [cfg["task_name"] for cfg in CSV_CONFIGS]
|
||||
dag_run = context['dag_run']
|
||||
tis = [t for t in dag_run.get_task_instances() if t.task_id in csv_task_ids]
|
||||
|
||||
from airflow.utils.state import State
|
||||
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
|
||||
|
||||
if not any_failed:
|
||||
# Mark zip as processed
|
||||
zip_key = zip_config.get("zip_object_name")
|
||||
zip_mtime = zip_config.get("zip_mtime")
|
||||
if zip_key and zip_mtime:
|
||||
_mark_processed(zip_key, zip_mtime)
|
||||
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
|
||||
else:
|
||||
failed_tasks = [ti_i.task_id for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error("Finalised workflow %s as FAILED (failed tasks=%s)",
|
||||
a_workflow_history_key, failed_tasks)
|
||||
raise AirflowFailException(f"Workflow failed for tasks: {failed_tasks}")
|
||||
|
||||
@af_task(task_id="cleanup_temp_files")
|
||||
def cleanup_temp_files(**context):
|
||||
"""Clean up temporary files for this run"""
|
||||
import shutil
|
||||
ti = context['ti']
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
temp_dir = init_bundle.get('temp_dir')
|
||||
|
||||
if temp_dir and os.path.exists(temp_dir):
|
||||
shutil.rmtree(temp_dir)
|
||||
logging.info("Cleaned up temp directory: %s", temp_dir)
|
||||
|
||||
@af_task(task_id="move_zip_to_archive")
|
||||
def move_zip_to_archive(**context):
|
||||
"""Move processed zip file to archive"""
|
||||
ti = context['ti']
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
zip_config = init_bundle.get('zip_config', {})
|
||||
|
||||
zip_key = zip_config.get("zip_object_name")
|
||||
if not zip_key:
|
||||
logging.warning("No zip key found, skipping archive")
|
||||
return
|
||||
|
||||
client = _oci_client()
|
||||
archive_key = zip_key.replace(OBJECT_PREFIX, f"{OBJECT_PREFIX}archive/", 1)
|
||||
|
||||
try:
|
||||
client.copy_object(
|
||||
OCI_NAMESPACE,
|
||||
OCI_BUCKET,
|
||||
{
|
||||
"sourceObjectName": zip_key,
|
||||
"destinationRegion": os.getenv("OCI_REGION", "eu-frankfurt-1"),
|
||||
"destinationNamespace": OCI_NAMESPACE,
|
||||
"destinationBucket": OCI_BUCKET,
|
||||
"destinationObjectName": archive_key
|
||||
}
|
||||
)
|
||||
logging.info("Copied to archive: %s -> %s", zip_key, archive_key)
|
||||
|
||||
client.delete_object(OCI_NAMESPACE, OCI_BUCKET, zip_key)
|
||||
logging.info("Deleted from inbox: %s", zip_key)
|
||||
except Exception as e:
|
||||
logging.error("Failed to archive zip file %s: %s", zip_key, e)
|
||||
raise
|
||||
|
||||
# Build the DAG structure
|
||||
config_task = get_zip_config()
|
||||
unzip_task = download_and_unzip(config_task)
|
||||
init_task = init_workflow(unzip_task)
|
||||
|
||||
# Create CSV processing tasks dynamically
|
||||
csv_tasks = []
|
||||
for csv_config in CSV_CONFIGS:
|
||||
task = PythonOperator(
|
||||
task_id=csv_config["task_name"],
|
||||
python_callable=run_mrds_task,
|
||||
op_kwargs={
|
||||
"task_config": {
|
||||
"task_name": csv_config["task_name"],
|
||||
"source_filename": csv_config["source_filename"],
|
||||
"source_path": "{{ ti.xcom_pull(task_ids='init_workflow')['task_configs'] | selectattr('task_name', 'equalto', '" + csv_config["task_name"] + "') | map(attribute='source_path') | first }}",
|
||||
"config_file": csv_config["config_yaml"],
|
||||
}
|
||||
},
|
||||
provide_context=True,
|
||||
)
|
||||
csv_tasks.append(task)
|
||||
|
||||
finalize_task = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
cleanup_task = cleanup_temp_files()
|
||||
archive_task = move_zip_to_archive()
|
||||
|
||||
all_good = EmptyOperator(
|
||||
task_id="All_went_well",
|
||||
trigger_rule=TriggerRule.ALL_SUCCESS,
|
||||
)
|
||||
|
||||
# Define task dependencies
|
||||
config_task >> unzip_task >> init_task >> csv_tasks >> finalize_task >> [cleanup_task, archive_task] >> all_good
|
||||
|
||||
logging.info("CSDB Ratings Full Processor DAG ready")
|
||||
29
airflow/ods/fxcd/BRANCH/config/m_ODS_FXCD_F_BRANCH_PARSE.yml
Normal file
29
airflow/ods/fxcd/BRANCH/config/m_ODS_FXCD_F_BRANCH_PARSE.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/BRANCH
|
||||
archive_prefix: ARCHIVE/FXCD/BRANCH
|
||||
workflow_name: w_ODS_FXCD_F_BRANCH
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_BRANCH_PARSE
|
||||
ods_prefix: INBOX/FXCD/BRANCH/FXCD_F_BRANCH
|
||||
output_table: FXCD_F_BRANCH
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'BRANCH_ID'
|
||||
column_header: 'BRANCH_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_ID'
|
||||
column_header: 'CTP_ID'
|
||||
@@ -0,0 +1,53 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/CLEARER
|
||||
archive_prefix: ARCHIVE/FXCD/CLEARER
|
||||
workflow_name: w_ODS_FXCD_F_CLEARER
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_CLEARER_PARSE
|
||||
ods_prefix: INBOX/FXCD/CLEARER/FXCD_F_CLEARER
|
||||
output_table: FXCD_F_CLEARER
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CLEARER_ID'
|
||||
column_header: 'CLEARER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_OF_FLAG'
|
||||
column_header: 'ELIGIBILITY_OF_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_FR_FLAG'
|
||||
column_header: 'ELIGIBILITY_FR_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'ACTIVE_FLAG'
|
||||
column_header: 'ACTIVE_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'OVERALL_OF_LIMIT_AMT'
|
||||
column_header: 'OVERALL_OF_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'CASH_OF_LIMIT_AMT'
|
||||
column_header: 'CASH_OF_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'SECURITIES_OF_LIMIT_AMT'
|
||||
column_header: 'SECURITIES_OF_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'OVERALL_FR_LIMIT_AMT'
|
||||
column_header: 'OVERALL_FR_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'CASH_FR_LIMIT_AMT'
|
||||
column_header: 'CASH_FR_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'SECURITIES_FR_LIMIT_AMT'
|
||||
column_header: 'SECURITIES_FR_LIMIT_AMT'
|
||||
@@ -0,0 +1,35 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/CLEARER_NCB_LIMIT
|
||||
archive_prefix: ARCHIVE/FXCD/CLEARER_NCB_LIMIT
|
||||
workflow_name: w_ODS_FXCD_F_CLEARER_NCB_LIMIT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_CLEARER_NCB_LIMIT_PARSE
|
||||
ods_prefix: INBOX/FXCD/CLEARER_NCB_LIMIT/FXCD_F_CLEARER_NCB_LIMIT
|
||||
output_table: FXCD_F_CLEARER_NCB_LIMIT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CLEARER_ID'
|
||||
column_header: 'CLEARER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'OVERALL_LIMIT_AMT'
|
||||
column_header: 'OVERALL_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'CASH_LIMIT_AMT'
|
||||
column_header: 'CASH_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'SECURITIES_LIMIT_AMT'
|
||||
column_header: 'SECURITIES_LIMIT_AMT'
|
||||
@@ -0,0 +1,29 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/CONSTANT
|
||||
archive_prefix: ARCHIVE/FXCD/CONSTANT
|
||||
workflow_name: w_ODS_FXCD_F_CONSTANT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_CONSTANT_PARSE
|
||||
ods_prefix: INBOX/FXCD/CONSTANT/FXCD_F_CONSTANT
|
||||
output_table: FXCD_F_CONSTANT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CONSTANT_NAME'
|
||||
column_header: 'CONSTANT_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'CONSTANT_VALUE'
|
||||
column_header: 'CONSTANT_VALUE'
|
||||
- type: 'csv_header'
|
||||
value: 'CONSTANT_TYPE'
|
||||
column_header: 'CONSTANT_TYPE'
|
||||
@@ -0,0 +1,70 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/COUNTERPARTY
|
||||
archive_prefix: ARCHIVE/FXCD/COUNTERPARTY
|
||||
workflow_name: w_ODS_FXCD_F_COUNTERPARTY
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_COUNTERPARTY_PARSE
|
||||
ods_prefix: INBOX/FXCD/COUNTERPARTY/FXCD_F_COUNTERPARTY
|
||||
output_table: FXCD_F_COUNTERPARTY
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_ID'
|
||||
column_header: 'CTP_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_GROUP_FKIT_CODE'
|
||||
column_header: 'CTP_GROUP_FKIT_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'ACTIVE_FLAG'
|
||||
column_header: 'ACTIVE_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_COMMENT'
|
||||
column_header: 'CTP_COMMENT'
|
||||
- type: 'csv_header'
|
||||
value: 'GUARANTOR_ID'
|
||||
column_header: 'GUARANTOR_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_OVERALL_LMT_AMT'
|
||||
column_header: 'OF_OVERALL_LMT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_MANUAL_FLAG'
|
||||
column_header: 'OF_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_OVERALL_LMT_AMT'
|
||||
column_header: 'FR_OVERALL_LMT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_MANUAL_FLAG'
|
||||
column_header: 'FR_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'MP_OVERALL_LMT_AMT'
|
||||
column_header: 'MP_OVERALL_LMT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'MP_MANUAL_FLAG'
|
||||
column_header: 'MP_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'EOI_CTP_GROUP'
|
||||
column_header: 'EOI_CTP_GROUP'
|
||||
- type: 'csv_header'
|
||||
value: 'ART_101_FLA'
|
||||
column_header: 'ART_101_FLA'
|
||||
- type: 'csv_header'
|
||||
value: 'MEDIAN_CAPITAL_FLAG'
|
||||
column_header: 'MEDIAN_CAPITAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'CHANGE_DESCRIPTION'
|
||||
column_header: 'CHANGE_DESCRIPTION'
|
||||
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/COUNTRY
|
||||
archive_prefix: ARCHIVE/FXCD/COUNTRY
|
||||
workflow_name: w_ODS_FXCD_F_COUNTRY
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_COUNTRY_PARSE
|
||||
ods_prefix: INBOX/FXCD/COUNTRY/FXCD_F_COUNTRY
|
||||
output_table: FXCD_F_COUNTRY
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_GDP'
|
||||
column_header: 'COUNTRY_GDP'
|
||||
- type: 'csv_header'
|
||||
value: 'NCB_USD_LAMBDA'
|
||||
column_header: 'NCB_USD_LAMBDA'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_FLAG'
|
||||
column_header: 'OF_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_FLAG'
|
||||
column_header: 'FR_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'EU_FLAG'
|
||||
column_header: 'EU_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'EUROSYSTEM_FLAG'
|
||||
column_header: 'EUROSYSTEM_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_PORTF_SHARE_OPTOUT'
|
||||
column_header: 'FR_PORTF_SHARE_OPTOUT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_LIMIT_CALC_OPTOUT'
|
||||
column_header: 'FR_LIMIT_CALC_OPTOUT'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_COMMENT'
|
||||
column_header: 'COUNTRY_COMMENT'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_LMT_AMNT'
|
||||
column_header: 'OF_LMT_AMNT'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_MANUAL_FLAG'
|
||||
column_header: 'OF_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_LMT_AMNT'
|
||||
column_header: 'FR_LMT_AMNT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_MANUAL_FLAG'
|
||||
column_header: 'FR_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'USD_PORTFOLIO_EUR_SIZE'
|
||||
column_header: 'USD_PORTFOLIO_EUR_SIZE'
|
||||
- type: 'csv_header'
|
||||
value: 'JPY_PORTFOLIO_EUR_SIZE'
|
||||
column_header: 'JPY_PORTFOLIO_EUR_SIZE'
|
||||
- type: 'csv_header'
|
||||
value: 'CAPITAL_KEY_AMNT'
|
||||
column_header: 'CAPITAL_KEY_AMNT'
|
||||
- type: 'csv_header'
|
||||
value: 'LAMBDA_MANUAL_FLAG'
|
||||
column_header: 'LAMBDA_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'CNY_PORTFOLIO_EUR_SIZE'
|
||||
column_header: 'CNY_PORTFOLIO_EUR_SIZE'
|
||||
- type: 'csv_header'
|
||||
value: 'CHANGE_DESCRIPTION'
|
||||
column_header: 'CHANGE_DESCRIPTION'
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/COUNTRY_NCB_LIMIT
|
||||
archive_prefix: ARCHIVE/FXCD/COUNTRY_NCB_LIMIT
|
||||
workflow_name: w_ODS_FXCD_F_COUNTRY_NCB_LIMIT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_COUNTRY_NCB_LIMIT_PARSE
|
||||
ods_prefix: INBOX/FXCD/COUNTRY_NCB_LIMIT/FXCD_F_COUNTRY_NCB_LIMIT
|
||||
output_table: FXCD_F_COUNTRY_NCB_LIMIT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'NCB_COUNTRY_ID'
|
||||
column_header: 'NCB_COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'USD_LIMIT_AMT'
|
||||
column_header: 'USD_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'JPY_LIMIT_AMT'
|
||||
column_header: 'JPY_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'TOTAL_LIMIT_AMT'
|
||||
column_header: 'TOTAL_LIMIT_AMT'
|
||||
@@ -0,0 +1,35 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/CTP_GROUP
|
||||
archive_prefix: ARCHIVE/FXCD/CTP_GROUP
|
||||
workflow_name: w_ODS_FXCD_F_CTP_GROUP
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_CTP_GROUP_PARSE
|
||||
ods_prefix: INBOX/FXCD/CTP_GROUP/FXCD_F_CTP_GROUP
|
||||
output_table: FXCD_F_CTP_GROUP
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_GROUP_FKIT_CODE'
|
||||
column_header: 'CTP_GROUP_FKIT_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_GROUP_NAME'
|
||||
column_header: 'CTP_GROUP_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_MAX_LIMIT_AMT'
|
||||
column_header: 'OF_MAX_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_MP_MAX_LIMIT_AMT'
|
||||
column_header: 'FR_MP_MAX_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'ACTIVE'
|
||||
column_header: 'ACTIVE'
|
||||
@@ -0,0 +1,32 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/CTP_GROUP_NCB_LIMIT
|
||||
archive_prefix: ARCHIVE/FXCD/CTP_GROUP_NCB_LIMIT
|
||||
workflow_name: w_ODS_FXCD_F_CTP_GROUP_NCB_LIMIT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_CTP_GROUP_NCB_LIMIT_PARSE
|
||||
ods_prefix: INBOX/FXCD/CTP_GROUP_NCB_LIMIT/FXCD_F_CTP_GROUP_NCB_LIMIT
|
||||
output_table: FXCD_F_CTP_GROUP_NCB_LIMIT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_GROUP_FKIT_CODE'
|
||||
column_header: 'CTP_GROUP_FKIT_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ID'
|
||||
column_header: 'ELIGIBILITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_AMT'
|
||||
column_header: 'LIMIT_AMT'
|
||||
@@ -0,0 +1,32 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/CTP_NCB_LIMIT
|
||||
archive_prefix: ARCHIVE/FXCD/CTP_NCB_LIMIT
|
||||
workflow_name: w_ODS_FXCD_F_CTP_NCB_LIMIT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_CTP_NCB_LIMIT_PARSE
|
||||
ods_prefix: INBOX/FXCD/CTP_NCB_LIMIT/FXCD_F_CTP_NCB_LIMIT
|
||||
output_table: FXCD_F_CTP_NCB_LIMIT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ID'
|
||||
column_header: 'ELIGIBILITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_ID'
|
||||
column_header: 'CTP_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_AMT'
|
||||
column_header: 'LIMIT_AMT'
|
||||
@@ -0,0 +1,78 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ELIGIBILITY
|
||||
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY
|
||||
workflow_name: w_ODS_FXCD_F_ELIGIBILITY
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ELIGIBILITY_PARSE
|
||||
ods_prefix: INBOX/FXCD/ELIGIBILITY/FXCD_F_ELIGIBILITY
|
||||
output_table: FXCD_F_ELIGIBILITY
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ID'
|
||||
column_header: 'ELIGIBILITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_TYPE'
|
||||
column_header: 'ELIGIBILITY_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_NAME'
|
||||
column_header: 'ELIGIBILITY_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_PERCENTAGE'
|
||||
column_header: 'LIMIT_PERCENTAGE'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_AMT_MANUAL_FLAG'
|
||||
column_header: 'LIMIT_AMT_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_AMT'
|
||||
column_header: 'LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'NCB_LMT_AMT_FLOOR'
|
||||
column_header: 'NCB_LMT_AMT_FLOOR'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_THRESHOLD'
|
||||
column_header: 'RATING_THRESHOLD'
|
||||
- type: 'csv_header'
|
||||
value: 'FKIT_SUBLIMIT_NAME'
|
||||
column_header: 'FKIT_SUBLIMIT_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ORDER'
|
||||
column_header: 'ELIGIBILITY_ORDER'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_DISTRIBUTION'
|
||||
column_header: 'LIMIT_DISTRIBUTION'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_MIN_REQ_RATINGS'
|
||||
column_header: 'CTP_MIN_REQ_RATINGS'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMITS_CALCULATION'
|
||||
column_header: 'LIMITS_CALCULATION'
|
||||
- type: 'csv_header'
|
||||
value: 'ART_101_FLAG'
|
||||
column_header: 'ART_101_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'DEFINITION'
|
||||
column_header: 'DEFINITION'
|
||||
- type: 'csv_header'
|
||||
value: 'FOR_CP_FLAG'
|
||||
column_header: 'FOR_CP_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'FOR_CPG_FLAG'
|
||||
column_header: 'FOR_CPG_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'OVERALL_LIMIT_FLAG'
|
||||
column_header: 'OVERALL_LIMIT_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'IDENTIFIER'
|
||||
column_header: 'IDENTIFIER'
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ELIGIBILITY_GROUP_MAP
|
||||
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_GROUP_MAP
|
||||
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_GROUP_MAP
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ELIGIBILITY_GROUP_MAP_PARSE
|
||||
ods_prefix: INBOX/FXCD/ELIGIBILITY_GROUP_MAP/FXCD_F_ELIGIBILITY_GROUP_MAP
|
||||
output_table: FXCD_F_ELIGIBILITY_GROUP_MAP
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_GROUP_FKIT_CODE'
|
||||
column_header: 'CTP_GROUP_FKIT_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ID'
|
||||
column_header: 'ELIGIBILITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_LIMIT_AMT'
|
||||
column_header: 'OF_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_MP_LIMIT_AMT'
|
||||
column_header: 'FR_MP_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_AMT'
|
||||
column_header: 'LIMIT_AMT'
|
||||
@@ -0,0 +1,44 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER
|
||||
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_ISSUER
|
||||
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_ISSUER
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ELIGIBILITY_ISSUER_PARSE
|
||||
ods_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER/FXCD_F_ELIGIBILITY_ISSUER
|
||||
output_table: FXCD_F_ELIGIBILITY_ISSUER
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ISSUER_ID'
|
||||
column_header: 'ELIGIBILITY_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ISSUER_NAME'
|
||||
column_header: 'ELIGIBILITY_ISSUER_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'ART_101_FLAG'
|
||||
column_header: 'ART_101_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_THRESHOLD'
|
||||
column_header: 'RATING_THRESHOLD'
|
||||
- type: 'csv_header'
|
||||
value: 'DEFINITION'
|
||||
column_header: 'DEFINITION'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_TERM_TYPE'
|
||||
column_header: 'RATING_TERM_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_UPLIFT_FACTOR'
|
||||
column_header: 'RATING_UPLIFT_FACTOR'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIG_FLAG'
|
||||
column_header: 'ELIG_FLAG'
|
||||
@@ -0,0 +1,32 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER_MAP
|
||||
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_ISSUER_MAP
|
||||
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_ISSUER_MAP
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ELIGIBILITY_ISSUER_MAP_PARSE
|
||||
ods_prefix: INBOX/FXCD/ELIGIBILITY_ISSUER_MAP/FXCD_F_ELIGIBILITY_ISSUER_MAP
|
||||
output_table: FXCD_F_ELIGIBILITY_ISSUER_MAP
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'ISSUER_ID'
|
||||
column_header: 'ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ISSUER_ID'
|
||||
column_header: 'ELIGIBILITY_ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELEGIBLE_FLAG'
|
||||
column_header: 'ELEGIBLE_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'MANUAL_ELIGIBLE_FLAG'
|
||||
column_header: 'MANUAL_ELIGIBLE_FLAG'
|
||||
@@ -0,0 +1,41 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ELIGIBILITY_MAP
|
||||
archive_prefix: ARCHIVE/FXCD/ELIGIBILITY_MAP
|
||||
workflow_name: w_ODS_FXCD_F_ELIGIBILITY_MAP
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ELIGIBILITY_MAP_PARSE
|
||||
ods_prefix: INBOX/FXCD/ELIGIBILITY_MAP/FXCD_F_ELIGIBILITY_MAP
|
||||
output_table: FXCD_F_ELIGIBILITY_MAP
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'CTP_ID'
|
||||
column_header: 'CTP_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ELIGIBILITY_ID'
|
||||
column_header: 'ELIGIBILITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_LIMIT_AMT'
|
||||
column_header: 'OF_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_FLAG'
|
||||
column_header: 'OF_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_LIMIT_AMT'
|
||||
column_header: 'FR_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_MP_FLAG'
|
||||
column_header: 'FR_MP_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_AMT'
|
||||
column_header: 'LIMIT_AMT'
|
||||
56
airflow/ods/fxcd/ENTITY/config/m_ODS_FXCD_F_ENTITY_PARSE.yml
Normal file
56
airflow/ods/fxcd/ENTITY/config/m_ODS_FXCD_F_ENTITY_PARSE.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ENTITY
|
||||
archive_prefix: ARCHIVE/FXCD/ENTITY
|
||||
workflow_name: w_ODS_FXCD_F_ENTITY
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ENTITY_PARSE
|
||||
ods_prefix: INBOX/FXCD/ENTITY/FXCD_F_ENTITY
|
||||
output_table: FXCD_F_ENTITY
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'FKIT_CODE'
|
||||
column_header: 'FKIT_CODE'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_NAME'
|
||||
column_header: 'ENTITY_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'BVD_BANK_INDEX_NUMBER'
|
||||
column_header: 'BVD_BANK_INDEX_NUMBER'
|
||||
- type: 'csv_header'
|
||||
value: 'RISK_COUNTRY_ID'
|
||||
column_header: 'RISK_COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ST_MANUAL_RATING_FLAG'
|
||||
column_header: 'ST_MANUAL_RATING_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'LT_MANUAL_RATING_FLAG'
|
||||
column_header: 'LT_MANUAL_RATING_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'ST_SECOND_BEST_RATING'
|
||||
column_header: 'ST_SECOND_BEST_RATING'
|
||||
- type: 'csv_header'
|
||||
value: 'LT_SECOND_BEST_RATING'
|
||||
column_header: 'LT_SECOND_BEST_RATING'
|
||||
- type: 'csv_header'
|
||||
value: 'CAP_TIER1_AMT'
|
||||
column_header: 'CAP_TIER1_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'CAP_EQUITY_AMT'
|
||||
column_header: 'CAP_EQUITY_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'BLOOMBERG_TICKER'
|
||||
column_header: 'BLOOMBERG_TICKER'
|
||||
@@ -0,0 +1,35 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/EQUIVALENCE_RULE
|
||||
archive_prefix: ARCHIVE/FXCD/EQUIVALENCE_RULE
|
||||
workflow_name: w_ODS_FXCD_F_EQUIVALENCE_RULE
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_EQUIVALENCE_RULE_PARSE
|
||||
ods_prefix: INBOX/FXCD/EQUIVALENCE_RULE/FXCD_F_EQUIVALENCE_RULE
|
||||
output_table: FXCD_F_EQUIVALENCE_RULE
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_TERM_TYPE'
|
||||
column_header: 'RATING_TERM_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'AGENCY_ID'
|
||||
column_header: 'AGENCY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'AGENCY_RATING'
|
||||
column_header: 'AGENCY_RATING'
|
||||
- type: 'csv_header'
|
||||
value: 'NUMERICAL_EQUIVALENCE'
|
||||
column_header: 'NUMERICAL_EQUIVALENCE'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_FACTOR'
|
||||
column_header: 'RATING_FACTOR'
|
||||
77
airflow/ods/fxcd/ISSUER/config/m_ODS_FXCD_F_ISSUER_PARSE.yml
Normal file
77
airflow/ods/fxcd/ISSUER/config/m_ODS_FXCD_F_ISSUER_PARSE.yml
Normal file
@@ -0,0 +1,77 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ISSUER
|
||||
archive_prefix: ARCHIVE/FXCD/ISSUER
|
||||
workflow_name: w_ODS_FXCD_F_ISSUER
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ISSUER_PARSE
|
||||
ods_prefix: INBOX/FXCD/ISSUER/FXCD_F_ISSUER
|
||||
output_table: FXCD_F_ISSUER
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'ISSUER_ID'
|
||||
column_header: 'ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ACTIVE_FLAG'
|
||||
column_header: 'ACTIVE_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'ISSUER_COMMENT'
|
||||
column_header: 'ISSUER_COMMENT'
|
||||
- type: 'csv_header'
|
||||
value: 'OFM_ISSUER_LMT'
|
||||
column_header: 'OFM_ISSUER_LMT'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_MANUAL_FLAG'
|
||||
column_header: 'OF_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'GLOBAL_LMT_AMNT'
|
||||
column_header: 'GLOBAL_LMT_AMNT'
|
||||
- type: 'csv_header'
|
||||
value: 'MANUAL_GLOBAL_LMT_FLAG'
|
||||
column_header: 'MANUAL_GLOBAL_LMT_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'OF_ISSUER_CATEGORY'
|
||||
column_header: 'OF_ISSUER_CATEGORY'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_ISSUER_CATEGORY'
|
||||
column_header: 'FR_ISSUER_CATEGORY'
|
||||
- type: 'csv_header'
|
||||
value: 'ISSUER_LMT'
|
||||
column_header: 'ISSUER_LMT'
|
||||
- type: 'csv_header'
|
||||
value: 'FR_MANUAL_FLAG'
|
||||
column_header: 'FR_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'NCB_USD_LAMBDA'
|
||||
column_header: 'NCB_USD_LAMBDA'
|
||||
- type: 'csv_header'
|
||||
value: 'ART_101_FLAG'
|
||||
column_header: 'ART_101_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'UPLIFT_IMPLIED_RATING'
|
||||
column_header: 'UPLIFT_IMPLIED_RATING'
|
||||
- type: 'csv_header'
|
||||
value: 'LAMBDA_MANUAL_FLAG'
|
||||
column_header: 'LAMBDA_MANUAL_FLAG'
|
||||
- type: 'csv_header'
|
||||
value: 'CHANGE_DESCRIPTION'
|
||||
column_header: 'CHANGE_DESCRIPTION'
|
||||
- type: 'csv_header'
|
||||
value: 'LT_FRM_LIMIT_RATIO'
|
||||
column_header: 'LT_FRM_LIMIT_RATIO'
|
||||
- type: 'csv_header'
|
||||
value: 'LT_FRM_LIMIT_RATIO_MANUAL_FLAG'
|
||||
column_header: 'LT_FRM_LIMIT_RATIO_MANUAL_FLAG'
|
||||
@@ -0,0 +1,38 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/ISSUER_NCB_LIMIT
|
||||
archive_prefix: ARCHIVE/FXCD/ISSUER_NCB_LIMIT
|
||||
workflow_name: w_ODS_FXCD_F_ISSUER_NCB_LIMIT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_ISSUER_NCB_LIMIT_PARSE
|
||||
ods_prefix: INBOX/FXCD/ISSUER_NCB_LIMIT/FXCD_F_ISSUER_NCB_LIMIT
|
||||
output_table: FXCD_F_ISSUER_NCB_LIMIT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'ISSUER_ID'
|
||||
column_header: 'ISSUER_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'USD_LIMIT_AMT'
|
||||
column_header: 'USD_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'JPY_LIMIT_AMT'
|
||||
column_header: 'JPY_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'TOTAL_LIMIT_AMT'
|
||||
column_header: 'TOTAL_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'LIMIT_TYPE'
|
||||
column_header: 'LIMIT_TYPE'
|
||||
@@ -0,0 +1,29 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/LIQUIDITY_NCB_LIMIT
|
||||
archive_prefix: ARCHIVE/FXCD/LIQUIDITY_NCB_LIMIT
|
||||
workflow_name: w_ODS_FXCD_F_LIQUIDITY_NCB_LIMIT
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_LIQUIDITY_NCB_LIMIT_PARSE
|
||||
ods_prefix: INBOX/FXCD/LIQUIDITY_NCB_LIMIT/FXCD_F_LIQUIDITY_NCB_LIMIT
|
||||
output_table: FXCD_F_LIQUIDITY_NCB_LIMIT
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'USD_NCB_MIN_LIMIT_AMT'
|
||||
column_header: 'USD_NCB_MIN_LIMIT_AMT'
|
||||
- type: 'csv_header'
|
||||
value: 'USD_NCB_MAX_LIMIT_AMT'
|
||||
column_header: 'USD_NCB_MAX_LIMIT_AMT'
|
||||
@@ -0,0 +1,47 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/NCB_PORTFOLIO_SHARE
|
||||
archive_prefix: ARCHIVE/FXCD/NCB_PORTFOLIO_SHARE
|
||||
workflow_name: w_ODS_FXCD_F_NCB_PORTFOLIO_SHARE
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_NCB_PORTFOLIO_SHARE_PARSE
|
||||
ods_prefix: INBOX/FXCD/NCB_PORTFOLIO_SHARE/FXCD_F_NCB_PORTFOLIO_SHARE
|
||||
output_table: FXCD_F_NCB_PORTFOLIO_SHARE
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_ID'
|
||||
column_header: 'COUNTRY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'COUNTRY_SHARE'
|
||||
column_header: 'COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'USD_COUNTRY_SHARE'
|
||||
column_header: 'USD_COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'JPY_COUNTRY_SHARE'
|
||||
column_header: 'JPY_COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'SCALED_COUNTRY_SHARE'
|
||||
column_header: 'SCALED_COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'SCALED_USD_COUNTRY_SHARE'
|
||||
column_header: 'SCALED_USD_COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'SCALED_JPY_COUNTRY_SHARE'
|
||||
column_header: 'SCALED_JPY_COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'CNY_COUNTRY_SHARE'
|
||||
column_header: 'CNY_COUNTRY_SHARE'
|
||||
- type: 'csv_header'
|
||||
value: 'SCALED_CNY_COUNTRY_SHARE'
|
||||
column_header: 'SCALED_CNY_COUNTRY_SHARE'
|
||||
32
airflow/ods/fxcd/RATING/config/m_ODS_FXCD_F_RATING_PARSE.yml
Normal file
32
airflow/ods/fxcd/RATING/config/m_ODS_FXCD_F_RATING_PARSE.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/RATING
|
||||
archive_prefix: ARCHIVE/FXCD/RATING
|
||||
workflow_name: w_ODS_FXCD_F_RATING
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_RATING_PARSE
|
||||
ods_prefix: INBOX/FXCD/RATING/FXCD_F_RATING
|
||||
output_table: FXCD_F_RATING
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'ENTITY_ID'
|
||||
column_header: 'ENTITY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'RATING_TERM_TYPE'
|
||||
column_header: 'RATING_TERM_TYPE'
|
||||
- type: 'csv_header'
|
||||
value: 'AGENCY_ID'
|
||||
column_header: 'AGENCY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'AGENCY_RATING'
|
||||
column_header: 'AGENCY_RATING'
|
||||
@@ -0,0 +1,29 @@
|
||||
# App configurations
|
||||
encoding_type: latin1
|
||||
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/FXCD/RATING_AGENCY
|
||||
archive_prefix: ARCHIVE/FXCD/RATING_AGENCY
|
||||
workflow_name: w_ODS_FXCD_F_RATING_AGENCY
|
||||
validation_schema_path: None
|
||||
file_type: csv
|
||||
|
||||
tasks:
|
||||
- task_name: m_ODS_FXCD_F_RATING_AGENCY_PARSE
|
||||
ods_prefix: INBOX/FXCD/RATING_AGENCY/FXCD_F_RATING_AGENCY
|
||||
output_table: FXCD_F_RATING_AGENCY
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'csv_header'
|
||||
value: 'AGENCY_ID'
|
||||
column_header: 'AGENCY_ID'
|
||||
- type: 'csv_header'
|
||||
value: 'AGENCY_NAME'
|
||||
column_header: 'AGENCY_NAME'
|
||||
- type: 'csv_header'
|
||||
value: 'ENABLED_FLAG'
|
||||
column_header: 'ENABLED_FLAG'
|
||||
0
airflow/ods/lm/adhoc_adjustments/config/.gitkeep
Normal file
0
airflow/ods/lm/adhoc_adjustments/config/.gitkeep
Normal file
129
airflow/ods/lm/adhoc_adjustments/config/adhoc_adjustments.xsd
Normal file
129
airflow/ods/lm/adhoc_adjustments/config/adhoc_adjustments.xsd
Normal file
@@ -0,0 +1,129 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns="http://escb.ecb.int/csm-adjustment"
|
||||
xmlns:lm="http://exdi.ecb.int/lm"
|
||||
targetNamespace="http://escb.ecb.int/csm-adjustment"
|
||||
elementFormDefault="qualified"
|
||||
attributeFormDefault="unqualified">
|
||||
|
||||
<xs:import namespace="http://exdi.ecb.int/lm" schemaLocation="../../lm_common/lm.xsd" />
|
||||
|
||||
<xs:element name="adjustmentMessages">
|
||||
<xs:complexType>
|
||||
<xs:choice>
|
||||
<xs:element ref="csmAdjustmentMessage" />
|
||||
<xs:element ref="quarterlyRevaluationAdjustmentMessage" />
|
||||
<xs:element ref="adhocAdjustmentMessage" />
|
||||
</xs:choice>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:element name="csmAdjustmentMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="header">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="year" type="xs:gYear" />
|
||||
<xs:element name="month" type="month" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="adjustment" type="adjustmentSingleForecast" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:element name="quarterlyRevaluationAdjustmentMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="header">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="year" type="xs:gYear" />
|
||||
<xs:element name="quarter" type="quarter" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:element name="adhocAdjustmentMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="header">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="date" type="xs:date" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:complexType name="baseAdjustment">
|
||||
<xs:sequence>
|
||||
<xs:element name="country" type="lm:isoCode" />
|
||||
<xs:element name="effectiveDate" type="xs:date" />
|
||||
<xs:element name="lastDateNotInForecast" type="xs:date" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="adjustmentSingleForecast">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="baseAdjustment">
|
||||
<xs:sequence>
|
||||
<xs:element name="forecastItem" type="forecastItem" />
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="adjustmentMultipleForecasts">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="baseAdjustment">
|
||||
<xs:sequence>
|
||||
<xs:element name="forecastItem" type="forecastItem" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="forecastItem">
|
||||
<xs:sequence>
|
||||
<xs:element name="forecastName" type="lm:forecastName" />
|
||||
<xs:element name="adjustmentAmount" type="extendedDecimalEuroValue" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:simpleType name="extendedDecimalEuroValue">
|
||||
<xs:restriction base="xs:decimal">
|
||||
<xs:totalDigits value="17" />
|
||||
<xs:fractionDigits value="8" />
|
||||
<xs:minInclusive value="-999999999.99999999" />
|
||||
<xs:maxInclusive value="999999999.99999999" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="month">
|
||||
<xs:restriction base="xs:int">
|
||||
<xs:minInclusive value="1" />
|
||||
<xs:maxInclusive value="12" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="quarter">
|
||||
<xs:restriction base="xs:int">
|
||||
<xs:minInclusive value="1" />
|
||||
<xs:maxInclusive value="4" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
</xs:schema>
|
||||
@@ -0,0 +1,78 @@
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/LM/AdhocAdjustment
|
||||
archive_prefix: ARCHIVE/LM/AdhocAdjustment
|
||||
workflow_name: w_ODS_LM_ADHOC_ADJUSTMENT_MSG
|
||||
validation_schema_path: '/opt/airflow/src/airflow/dags/ods/lm/adhoc_adjustments/config/adhoc_adjustments.xsd'
|
||||
file_type: xml
|
||||
|
||||
# List of tasks
|
||||
tasks:
|
||||
- task_name: m_ODS_LM_ADHOC_ADJUSTMENTS_HEADER_PARSE
|
||||
ods_prefix: INBOX/LM/AdhocAdjustment/LM_ADHOC_ADJUSTMENTS_HEADER
|
||||
output_table: LM_ADHOC_ADJUSTMENTS_HEADER
|
||||
namespaces:
|
||||
ns2: 'http://escb.ecb.int/csm-adjustment'
|
||||
output_columns:
|
||||
- type: 'xpath_element_id'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:header/ns2:date'
|
||||
column_header: 'ADJUSTMENT_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:header/ns2:version'
|
||||
column_header: 'VERSION'
|
||||
is_key: 'N'
|
||||
- task_name: m_ODS_LM_ADHOC_ADJUSTMENTS_ITEM_HEADER_PARSE
|
||||
ods_prefix: INBOX/LM/AdhocAdjustment/LM_ADHOC_ADJUSTMENTS_ITEM_HEADER
|
||||
output_table: LM_ADHOC_ADJUSTMENTS_ITEM_HEADER
|
||||
namespaces:
|
||||
ns2: 'http://escb.ecb.int/csm-adjustment'
|
||||
output_columns:
|
||||
- type: 'xpath_element_id'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath_element_id'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage'
|
||||
column_header: 'A_HEADER_FK'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:country'
|
||||
column_header: 'COUNTRY'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:effectiveDate'
|
||||
column_header: 'EFFECTIVE_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:lastDateNotInForecast'
|
||||
column_header: 'LAST_DATE_NOT_FORECAST'
|
||||
is_key: 'N'
|
||||
|
||||
- task_name: m_ODS_LM_ADHOC_ADJUSTMENTS_ITEM_PARSE
|
||||
ods_prefix: INBOX/LM/AdhocAdjustment/LM_ADHOC_ADJUSTMENTS_ITEM
|
||||
output_table: LM_ADHOC_ADJUSTMENTS_ITEM
|
||||
namespaces:
|
||||
ns2: 'http://escb.ecb.int/csm-adjustment'
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath_element_id'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment'
|
||||
column_header: 'A_HEADER_FK'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:forecastItem/ns2:forecastName'
|
||||
column_header: 'FORECAST_NAME'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:adjustmentMessages/ns2:adhocAdjustmentMessage/ns2:adjustment/ns2:forecastItem/ns2:adjustmentAmount'
|
||||
column_header: 'ADJUSTMENT_AMOUNT'
|
||||
is_key: 'N'
|
||||
|
||||
|
||||
@@ -0,0 +1,520 @@
|
||||
# dags/w_ODS_LM_ADHOC_ADJUSTMENT_MSG.py
|
||||
# Idempotent, per-object mtime tracking
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import timedelta, datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.decorators import task as af_task
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
# Import libs
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
dag_id = Path(__file__).stem
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id,
|
||||
}
|
||||
|
||||
# OCI settings
|
||||
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
OCI_BUCKET = os.getenv("INBOX_BUCKET")
|
||||
|
||||
# Config YAML (single config for all files)
|
||||
CONFIG_YAML = os.getenv(
|
||||
"EXDI_SINGLE_CONFIG_YAML",
|
||||
"/opt/airflow/src/airflow/dags/ods/lm/adhoc_adjustments/config/m_ODS_LM_ADHOC_ADJUSTMENT_PARSE.yaml",
|
||||
|
||||
)
|
||||
logging.info("Using EXDI_SINGLE_CONFIG_YAML=%s", CONFIG_YAML)
|
||||
|
||||
# Idempotency controls
|
||||
REPROCESS = (os.getenv("EXDI_REPROCESS", "false").lower() in ("1", "true", "yes"))
|
||||
LAST_TS_VAR = f"{dag_id}__last_seen_ts" # legacy watermark (kept for observability)
|
||||
PROCESSED_SET_VAR = f"{dag_id}__processed_objects" # legacy: list of keys (back-compat only)
|
||||
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts" # NEW: map key -> last processed mtime (epoch float)
|
||||
|
||||
|
||||
# Helpers
|
||||
|
||||
def _oci_client():
|
||||
"""
|
||||
Create an OCI Object Storage client.
|
||||
Order: Resource Principals -> Instance Principals.
|
||||
"""
|
||||
import oci
|
||||
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
|
||||
# RP
|
||||
try:
|
||||
rp_signer = oci.auth.signers.get_resource_principals_signer()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
|
||||
except Exception as e:
|
||||
logging.info("RP not available: %s", e)
|
||||
# IP
|
||||
try:
|
||||
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
|
||||
except Exception as e:
|
||||
logging.info("IP not available: %s", e)
|
||||
|
||||
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
|
||||
raise RuntimeError("Failed to create OCI client")
|
||||
|
||||
def _load_yaml(cfg_path: str) -> dict:
|
||||
import yaml
|
||||
p = Path(cfg_path)
|
||||
if not p.exists():
|
||||
raise FileNotFoundError(f"Config YAML not found: {cfg_path}")
|
||||
return yaml.safe_load(p.read_text()) or {}
|
||||
|
||||
# Build config-derived constants directly from YAML
|
||||
try:
|
||||
CONFIG_DATA = _load_yaml(CONFIG_YAML)
|
||||
OBJECT_PREFIX = CONFIG_DATA.get("inbox_prefix")
|
||||
if not (isinstance(OBJECT_PREFIX, str) and OBJECT_PREFIX.strip()):
|
||||
raise AirflowFailException("YAML must define 'inbox_prefix' for OBJECT_PREFIX.")
|
||||
OBJECT_PREFIX = OBJECT_PREFIX.strip()
|
||||
logging.info("YAML inbox_prefix -> OBJECT_PREFIX: %s", OBJECT_PREFIX)
|
||||
except Exception as e:
|
||||
logging.error("Failed to resolve OBJECT_PREFIX from YAML %s: %s", CONFIG_YAML, e)
|
||||
OBJECT_PREFIX = None
|
||||
|
||||
# New idempotency map (key -> last_processed_ts)
|
||||
def _load_processed_map() -> dict[str, float]:
|
||||
"""
|
||||
Returns {object_key: last_processed_ts}.
|
||||
Back-compat: if old set variable exists (list), treat those keys as ts=0.
|
||||
"""
|
||||
try:
|
||||
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
|
||||
m = json.loads(raw) or {}
|
||||
if isinstance(m, dict):
|
||||
return {k: float(v) for k, v in m.items()}
|
||||
except Exception:
|
||||
pass
|
||||
# Back-compat: migrate old set/list
|
||||
try:
|
||||
old = json.loads(Variable.get(PROCESSED_SET_VAR, default_var="[]"))
|
||||
if isinstance(old, list):
|
||||
return {k: 0.0 for k in old}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_processed_map(m: dict[str, float]) -> None:
|
||||
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
|
||||
|
||||
def _mark_processed_ts(objs: list[tuple[str, float]]):
|
||||
"""
|
||||
Update processed map with list of (object_key, mtime).
|
||||
"""
|
||||
if REPROCESS or not objs:
|
||||
return
|
||||
m = _load_processed_map()
|
||||
for key, ts in objs:
|
||||
try:
|
||||
ts = float(ts)
|
||||
except Exception:
|
||||
continue
|
||||
prev = float(m.get(key, 0.0))
|
||||
if ts > prev:
|
||||
m[key] = ts
|
||||
_save_processed_map(m)
|
||||
logging.info("Processed map updated; size=%d", len(m))
|
||||
|
||||
# Object listing (per-key mtime)
|
||||
def _list_new_xml_objects(prefix: str) -> list[dict]:
|
||||
"""
|
||||
List .xml objects and decide inclusion per-object:
|
||||
include if REPROCESS or object_mtime > processed_map.get(object_key, 0.0)
|
||||
Returns: [{"name": "<full-key>", "base": "<file.xml>", "mtime": <epoch float>}]
|
||||
"""
|
||||
if not OCI_NAMESPACE or not OCI_BUCKET:
|
||||
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
|
||||
|
||||
client = _oci_client()
|
||||
processed_map = _load_processed_map()
|
||||
|
||||
try:
|
||||
last_seen = float(Variable.get(LAST_TS_VAR, default_var="0"))
|
||||
except Exception:
|
||||
last_seen = 0.0
|
||||
|
||||
logging.info("Watermark last_seen=%s; processed_map_count=%d; prefix=%s",
|
||||
last_seen, len(processed_map), prefix)
|
||||
|
||||
# NOTE: add pagination if needed
|
||||
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=prefix)
|
||||
|
||||
new_items: list[dict] = []
|
||||
newest_ts = last_seen
|
||||
|
||||
for o in (resp.data.objects or []):
|
||||
name = (o.name or "").strip()
|
||||
base = name.rsplit("/", 1)[-1] if name else ""
|
||||
logging.info("Processing object: %s", base)
|
||||
|
||||
# Skip folder markers / empty keys
|
||||
if not name or name.endswith('/') or not base:
|
||||
logging.debug("Skip: folder marker or empty key: %r", name)
|
||||
continue
|
||||
|
||||
if not base.lower().endswith(".xml"):
|
||||
logging.debug("Skip: not .xml: %r", name)
|
||||
continue
|
||||
|
||||
# Resolve mtime
|
||||
ts = None
|
||||
t = getattr(o, "time_created", None)
|
||||
if t:
|
||||
try:
|
||||
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
|
||||
except Exception:
|
||||
ts = None
|
||||
|
||||
if ts is None:
|
||||
try:
|
||||
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
|
||||
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
|
||||
if lm:
|
||||
dt = parsedate_to_datetime(lm)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
ts = dt.timestamp()
|
||||
logging.debug("Resolved ts via HEAD Last-Modified for %s: %s", name, ts)
|
||||
except Exception as e:
|
||||
logging.warning("head_object failed for %s: %s", name, e)
|
||||
|
||||
if ts is None:
|
||||
ts = datetime.now(timezone.utc).timestamp()
|
||||
logging.warning("Object %s missing timestamp; falling back to now=%s", name, ts)
|
||||
|
||||
last_proc_ts = float(processed_map.get(name, 0.0))
|
||||
include = REPROCESS or (ts > last_proc_ts)
|
||||
|
||||
logging.info(
|
||||
"Decision for %s: obj_ts=%s, last_proc_ts=%s, REPROCESS=%s -> include=%s",
|
||||
name, ts, last_proc_ts, REPROCESS, include
|
||||
)
|
||||
|
||||
if not include:
|
||||
continue
|
||||
|
||||
item = {"name": name, "base": base, "mtime": ts}
|
||||
new_items.append(item)
|
||||
if ts > newest_ts:
|
||||
newest_ts = ts
|
||||
|
||||
# Watermark advanced for visibility (optional)
|
||||
if not REPROCESS and new_items and newest_ts > last_seen:
|
||||
Variable.set(LAST_TS_VAR, str(newest_ts))
|
||||
logging.info("Advanced watermark from %s to %s", last_seen, newest_ts)
|
||||
|
||||
new_items.sort(key=lambda x: x["mtime"]) # ascending
|
||||
logging.info("Found %d candidate .xml object(s) under prefix %s", len(new_items), prefix)
|
||||
return new_items
|
||||
|
||||
|
||||
# DAG
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='EXDI workflow (polling): single YAML config for all XML files in OCI',
|
||||
schedule_interval=None, # Run EVERY 10 MIN
|
||||
catchup=False,
|
||||
max_active_runs=1,
|
||||
render_template_as_native_obj=True,
|
||||
tags=["EXDI", "MRDS", "ODS", "OCI", "ADHOC_ADJUSTMENT"],
|
||||
) as dag:
|
||||
|
||||
@af_task(task_id="poll_oci_for_xml")
|
||||
def poll_oci_for_xml():
|
||||
"""
|
||||
Lists new .xml objects and prepares a workload list.
|
||||
Returns {"workload": [{"object": "<key>", "base": "<file.xml>", "mtime": <float>} ...]}
|
||||
"""
|
||||
if not OBJECT_PREFIX:
|
||||
raise AirflowFailException("No OCI object prefix configured. Check YAML 'inbox_prefix'.")
|
||||
|
||||
new_objs = _list_new_xml_objects(OBJECT_PREFIX)
|
||||
logging.info("New .xml objects found: %s", json.dumps(new_objs, indent=2))
|
||||
print("New .xml objects found:", json.dumps(new_objs, indent=2))
|
||||
|
||||
# already contains base + mtime
|
||||
workload = [{"object": it["name"], "base": it["base"], "mtime": it["mtime"]} for it in new_objs]
|
||||
logging.info("Prepared workload items: %d", len(workload))
|
||||
print("Prepared workload:", json.dumps(workload, indent=2))
|
||||
return {"workload": workload}
|
||||
|
||||
@af_task(task_id="init_workflow")
|
||||
def init_workflow(polled: dict):
|
||||
"""Initialize workflow; start MRDS workflow; build per-file task configs."""
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
env = os.getenv("MRDS_ENV", "dev")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
missing = []
|
||||
if not username: missing.append("MRDS_LOADER_DB_USER")
|
||||
if not password: missing.append("MRDS_LOADER_DB_PASS")
|
||||
if not tnsalias: missing.append("MRDS_LOADER_DB_TNS")
|
||||
raise AirflowFailException(f"Missing required env vars: {', '.join(missing)}")
|
||||
|
||||
workload = (polled or {}).get("workload") or []
|
||||
|
||||
# Airflow context for run_id
|
||||
from airflow.operators.python import get_current_context
|
||||
ctx = get_current_context()
|
||||
run_id = str(ctx['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
|
||||
|
||||
workflow_context = {
|
||||
"run_id": run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
# Build TASK_CONFIGS dynamically: one per file, sequential numbering
|
||||
task_base_name = "m_ODS_LM_ADHOC_ADJUSTMENT"
|
||||
task_configs = []
|
||||
for idx, w in enumerate(workload, start=1):
|
||||
task_configs.append({
|
||||
"task_name": f"{task_base_name}_{idx}",
|
||||
"source_filename": w["base"], # pass basename to MRDS (adjust if you need full key)
|
||||
"config_file": CONFIG_YAML,
|
||||
})
|
||||
|
||||
bundle = {
|
||||
"workflow_history_key": a_workflow_history_key,
|
||||
"workflow_context": workflow_context,
|
||||
"workload": workload, # includes object + mtime
|
||||
"task_configs": task_configs, # list-of-dicts for mapping
|
||||
"env": env,
|
||||
}
|
||||
|
||||
logging.info("Init complete; workload=%d, tasks=%d", len(workload), len(task_configs))
|
||||
return bundle
|
||||
|
||||
@af_task(task_id="get_task_configs")
|
||||
def get_task_configs(init_bundle: dict):
|
||||
return init_bundle["task_configs"]
|
||||
|
||||
def run_mrds_task(task_name: str, source_filename: str, config_file: str, **context):
|
||||
"""Run MRDS for a single file (sequential via mapped task with max_active_tis_per_dag=1)."""
|
||||
ti = context['ti']
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"Config file not found: {config_file}")
|
||||
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
workflow_context = init_bundle.get('workflow_context')
|
||||
workload = init_bundle.get('workload') or []
|
||||
if not workflow_context:
|
||||
raise AirflowFailException("No workflow_context from init_workflow")
|
||||
|
||||
# resolve full object key + mtime by matching base name from workload
|
||||
full_object_key, object_mtime = None, None
|
||||
for w in workload:
|
||||
if w.get('base') == source_filename:
|
||||
full_object_key = w.get('object')
|
||||
object_mtime = w.get('mtime')
|
||||
break
|
||||
|
||||
# Print/log the file being processed
|
||||
logging.info("%s: picking file %s (object=%s, mtime=%s)",
|
||||
task_name, source_filename, full_object_key or source_filename, object_mtime)
|
||||
print(f"{task_name}: picking file {source_filename} (object={full_object_key or source_filename}, mtime={object_mtime})")
|
||||
|
||||
try:
|
||||
# NOTE: if MRDS expects full URI, change 'source_filename' to 'full_object_key'
|
||||
mrds_main(
|
||||
workflow_context,
|
||||
source_filename, # or full_object_key if required in your env
|
||||
config_file,
|
||||
generate_workflow_context=False
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("%s: MRDS failed on %s", task_name, source_filename)
|
||||
raise
|
||||
|
||||
# Mark processed with the mtime we saw during poll
|
||||
if full_object_key and object_mtime:
|
||||
_mark_processed_ts([(full_object_key, object_mtime)])
|
||||
|
||||
ti.xcom_push(key='task_status', value='SUCCESS')
|
||||
logging.info("%s: success", task_name)
|
||||
return "SUCCESS"
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow across all per-file tasks (mapped)."""
|
||||
from airflow.utils.state import State
|
||||
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
a_workflow_history_key = init_bundle.get('workflow_history_key')
|
||||
if a_workflow_history_key is None:
|
||||
raise AirflowFailException("No workflow history key; cannot finalise workflow")
|
||||
|
||||
mapped_task_id = "m_ODS_LM_ADHOC_ADJUSTMENT"
|
||||
tis = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
|
||||
|
||||
if not tis:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info("Finalised workflow %s as SUCCESS (no files)", a_workflow_history_key)
|
||||
return
|
||||
|
||||
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
|
||||
if not any_failed:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
|
||||
return
|
||||
|
||||
failed_idxs = [getattr(ti_i, "map_index", None) for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error("Finalised workflow %s as FAILED (failed map indexes=%s)", a_workflow_history_key, failed_idxs)
|
||||
raise AirflowFailException(f"Workflow failed for mapped indexes: {failed_idxs}")
|
||||
|
||||
def check_success_for_mopdb(**context):
|
||||
"""Check if all processing tasks succeeded before triggering MOPDB."""
|
||||
from airflow.utils.state import State
|
||||
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
has_failures = False
|
||||
failure_reasons = []
|
||||
|
||||
# Check finalize_workflow task
|
||||
finalize_task = dag_run.get_task_instance('finalize_workflow')
|
||||
if finalize_task.state == State.FAILED:
|
||||
has_failures = True
|
||||
failure_reasons.append("finalize_workflow failed")
|
||||
|
||||
# Check all mapped tasks (per-file processing)
|
||||
mapped_task_id = "m_ODS_LM_ADHOC_ADJUSTMENT"
|
||||
mapped_tasks = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
|
||||
|
||||
for task_instance in mapped_tasks:
|
||||
if task_instance.state in {State.FAILED, State.UPSTREAM_FAILED}:
|
||||
has_failures = True
|
||||
map_idx = getattr(task_instance, 'map_index', 'unknown')
|
||||
failure_reasons.append(f"Processing task failed at index {map_idx}")
|
||||
|
||||
if has_failures:
|
||||
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
# Check if all mapped tasks were skipped (no files to process)
|
||||
all_skipped = all(t.state == State.SKIPPED for t in mapped_tasks) if mapped_tasks else True
|
||||
|
||||
if all_skipped or not mapped_tasks:
|
||||
error_msg = "All processing tasks were skipped (no files to process) - skipping MOPDB trigger"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
|
||||
return "SUCCESS"
|
||||
|
||||
except AirflowSkipException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
|
||||
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
|
||||
|
||||
# Operators & Dependencies
|
||||
poll_task = poll_oci_for_xml()
|
||||
init_out = init_workflow(poll_task)
|
||||
task_cfgs = get_task_configs(init_out)
|
||||
|
||||
@af_task(task_id="m_ODS_LM_ADHOC_ADJUSTMENT", max_active_tis_per_dag=1)
|
||||
def mapped_run(task_name: str, source_filename: str, config_file: str, **context):
|
||||
return run_mrds_task(task_name=task_name, source_filename=source_filename, config_file=config_file, **context)
|
||||
|
||||
per_file = mapped_run.expand_kwargs(task_cfgs)
|
||||
|
||||
finalize_workflow = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
check_mopdb = PythonOperator(
|
||||
task_id='check_success_for_mopdb',
|
||||
python_callable=check_success_for_mopdb,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
trigger_mopdb = TriggerDagRunOperator(
|
||||
task_id="Trigger_w_MOPDB_LM_ADHOC_ADJUSTMENT",
|
||||
trigger_dag_id="w_MOPDB_LM_ADHOC_ADJUSTMENT",
|
||||
conf={
|
||||
"source_dag": dag_id,
|
||||
"upstream_run_id": "{{ run_id }}",
|
||||
"objects": "{{ (ti.xcom_pull(task_ids='poll_oci_for_xml')['workload'] | map(attribute='object') | list) if ti.xcom_pull(task_ids='poll_oci_for_xml') else [] }}",
|
||||
"workflow_history_key": "{{ (ti.xcom_pull(task_ids='init_workflow')['workflow_history_key']) if ti.xcom_pull(task_ids='init_workflow') else None }}"
|
||||
},
|
||||
wait_for_completion=False, # CHANGED: Don't wait for completion
|
||||
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, # CHANGED: Only trigger if check succeeds
|
||||
retries=0,
|
||||
)
|
||||
|
||||
all_good = EmptyOperator(
|
||||
task_id="All_went_well",
|
||||
trigger_rule=TriggerRule.ALL_DONE, # CHANGED: Always run to mark end
|
||||
)
|
||||
|
||||
# CHANGED: Chain with check task before trigger
|
||||
poll_task >> init_out >> task_cfgs >> per_file >> finalize_workflow >> check_mopdb >> trigger_mopdb >> all_good
|
||||
|
||||
logging.info(
|
||||
"EXDI DAG ready: inbox_prefix=%s; using per-object processed ts map %s.",
|
||||
OBJECT_PREFIX, PROCESSED_TS_VAR
|
||||
)
|
||||
0
airflow/ods/lm/balancesheet/config/.gitkeep
Normal file
0
airflow/ods/lm/balancesheet/config/.gitkeep
Normal file
102
airflow/ods/lm/balancesheet/config/balancesheet.xsd
Normal file
102
airflow/ods/lm/balancesheet/config/balancesheet.xsd
Normal file
@@ -0,0 +1,102 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns="http://escb.ecb.int/balancesheet"
|
||||
xmlns:lm="http://exdi.ecb.int/lm"
|
||||
targetNamespace="http://escb.ecb.int/balancesheet"
|
||||
elementFormDefault="qualified"
|
||||
attributeFormDefault="unqualified">
|
||||
|
||||
<xs:import namespace="http://exdi.ecb.int/lm" schemaLocation="../../lm_common/lm.xsd" />
|
||||
|
||||
<xs:element name="balanceSheetMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="cbBalanceSheet" type="cbBalanceSheet" minOccurs="1" maxOccurs="unbounded" />
|
||||
<xs:element name="eurosystemBalanceSheet" type="eurosystemBalanceSheet" minOccurs="0" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:complexType name="cbHeader">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="eurosystemHeader">
|
||||
<xs:sequence>
|
||||
<xs:element name="status" type="status" />
|
||||
<xs:element name="freeText" type="lm:freeText" minOccurs="0" />
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="eurosystemHeader">
|
||||
<xs:sequence>
|
||||
<xs:element name="country" type="lm:isoCode" />
|
||||
<xs:element name="referenceDate" type="xs:date" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
<xs:element name="dateOfTransmission" type="xs:date" minOccurs="0" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="cbBalanceSheet">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Represents a balanceSheet for a CB</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="header" type="cbHeader" />
|
||||
<xs:element name="assets" type="balanceSheetItem" minOccurs="1" />
|
||||
<xs:element name="liabilities" type="balanceSheetItem" minOccurs="1" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="eurosystemBalanceSheet">
|
||||
<xs:annotation>
|
||||
<xs:documentation>Represents an aggregated balanceSheet of all CBs</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:sequence>
|
||||
<xs:element name="header" type="eurosystemHeader" />
|
||||
<xs:element name="assets" type="balanceSheetItem" minOccurs="1" />
|
||||
<xs:element name="liabilities" type="balanceSheetItem" minOccurs="1" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="balanceSheetItem">
|
||||
<xs:sequence>
|
||||
<xs:element name="level" type="lm:positiveInt" />
|
||||
<xs:element name="position" type="lm:positiveInt" />
|
||||
<xs:element name="itemType" type="itemType" minOccurs="0" />
|
||||
<xs:element name="fullyQualifiedPosition" type="fullyQualifiedPosition" minOccurs="0" />
|
||||
<xs:element name="name" type="itemName" minOccurs="0" />
|
||||
<xs:element name="amount" type="lm:amountInEuro" />
|
||||
<xs:element name="item" type="balanceSheetItem" minOccurs="0" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:simpleType name="itemName">
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:maxLength value="200" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="itemType">
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:enumeration value="Asset" />
|
||||
<xs:enumeration value="Liability" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="fullyQualifiedPosition">
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:pattern value="(A|L)(\.([1-9][0-9]*))+" />
|
||||
<xs:maxLength value="200" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="status">
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:enumeration value="U" />
|
||||
<xs:enumeration value="B" />
|
||||
<xs:enumeration value="R" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
</xs:schema>
|
||||
@@ -0,0 +1,82 @@
|
||||
# Global configurations
|
||||
tmpdir: /tmp
|
||||
inbox_prefix: INBOX/LM/AggregatedDailyFinancialStatementOfTheEurosystem
|
||||
archive_prefix: ARCHIVE/LM/AggregatedDailyFinancialStatementOfTheEurosystem
|
||||
workflow_name: w_ODS_LM_BALANCESHEET
|
||||
validation_schema_path: '/opt/airflow/src/airflow/dags/ods/lm/balancesheet/config/balancesheet.xsd'
|
||||
file_type: xml
|
||||
|
||||
# List of tasks
|
||||
tasks:
|
||||
- task_name: m_ODS_LM_BALANCESHEET_HEADER_PARSE
|
||||
ods_prefix: INBOX/LM/AggregatedDailyFinancialStatementOfTheEurosystem/LM_BALANCESHEET_HEADER
|
||||
output_table: LM_BALANCESHEET_HEADER
|
||||
namespaces:
|
||||
ns2: 'http://escb.ecb.int/balancesheet'
|
||||
output_columns:
|
||||
- type: 'xpath_element_id'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:country'
|
||||
column_header: 'COUNTRY'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:referenceDate'
|
||||
column_header: 'REFERENCE_DATE'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:version'
|
||||
column_header: 'VERSION'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:status'
|
||||
column_header: 'STATUS'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*/ns2:header/ns2:freeText'
|
||||
column_header: 'FREE_TEXT'
|
||||
is_key: 'N'
|
||||
|
||||
|
||||
- task_name: m_ODS_LM_BALANCESHEET_ITEM_PARSE
|
||||
ods_prefix: INBOX/LM/AggregatedDailyFinancialStatementOfTheEurosystem/LM_BALANCESHEET_ITEM
|
||||
output_table: LM_BALANCESHEET_ITEM
|
||||
namespaces:
|
||||
ns2: 'http://escb.ecb.int/balancesheet'
|
||||
output_columns:
|
||||
- type: 'a_key'
|
||||
column_header: 'A_KEY'
|
||||
- type: 'workflow_key'
|
||||
column_header: 'A_WORKFLOW_HISTORY_KEY'
|
||||
- type: 'xpath_element_id'
|
||||
value: '/ns2:balanceSheetMessage/ns2:*'
|
||||
column_header: 'A_HEADER_FK'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage//ns2:level'
|
||||
is_key: 'N'
|
||||
column_header: 'ITEM_LEVEL'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage//ns2:position'
|
||||
is_key: 'N'
|
||||
column_header: 'POSITION'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage//ns2:itemType'
|
||||
is_key: 'N'
|
||||
column_header: 'ITEM_TYPE'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage//ns2:fullyQualifiedPosition'
|
||||
column_header: 'FULLY_QUALIFIED_POSITION'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage//ns2:name'
|
||||
column_header: 'NAME'
|
||||
is_key: 'N'
|
||||
- type: 'xpath'
|
||||
value: '/ns2:balanceSheetMessage//ns2:amount'
|
||||
column_header: 'AMOUNT'
|
||||
is_key: 'N'
|
||||
|
||||
|
||||
520
airflow/ods/lm/balancesheet/dags/w_ODS_LM_BALANCESHEET.py
Normal file
520
airflow/ods/lm/balancesheet/dags/w_ODS_LM_BALANCESHEET.py
Normal file
@@ -0,0 +1,520 @@
|
||||
# dags/m_ODS_LM_BALANCESHEET.py
|
||||
# Idempotent, per-object mtime tracking
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import timedelta, datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models import Variable
|
||||
from airflow.decorators import task as af_task
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
|
||||
try:
|
||||
from airflow.exceptions import AirflowFailException, AirflowSkipException
|
||||
except Exception:
|
||||
from airflow.exceptions import AirflowException as AirflowFailException
|
||||
from airflow.exceptions import AirflowSkipException
|
||||
|
||||
# Import libs
|
||||
sys.path.append('/opt/airflow/python/mrds_common')
|
||||
sys.path.append('/opt/airflow/src/airflow/dags/ods/exdi')
|
||||
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
|
||||
from mrds.core import main as mrds_main
|
||||
|
||||
dag_id = Path(__file__).stem
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'start_date': days_ago(1),
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1,
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
}
|
||||
|
||||
WORKFLOW_CONFIG = {
|
||||
"database_name": "ODS",
|
||||
"workflow_name": dag_id,
|
||||
}
|
||||
|
||||
# OCI settings
|
||||
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
||||
OCI_BUCKET = os.getenv("INBOX_BUCKET")
|
||||
|
||||
# Config YAML (single config for all files)
|
||||
CONFIG_YAML = os.getenv(
|
||||
"EXDI_SINGLE_CONFIG_YAML",
|
||||
"/opt/airflow/src/airflow/dags/ods/lm/balancesheet/config/m_ODS_LM_BALANCESHEET_PARSE.yaml",
|
||||
|
||||
)
|
||||
logging.info("Using EXDI_SINGLE_CONFIG_YAML=%s", CONFIG_YAML)
|
||||
|
||||
# Idempotency controls
|
||||
REPROCESS = (os.getenv("EXDI_REPROCESS", "false").lower() in ("1", "true", "yes"))
|
||||
LAST_TS_VAR = f"{dag_id}__last_seen_ts" # legacy watermark (kept for observability)
|
||||
PROCESSED_SET_VAR = f"{dag_id}__processed_objects" # legacy: list of keys (back-compat only)
|
||||
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts" # NEW: map key -> last processed mtime (epoch float)
|
||||
|
||||
|
||||
# Helpers
|
||||
|
||||
def _oci_client():
|
||||
"""
|
||||
Create an OCI Object Storage client.
|
||||
Order: Resource Principals -> Instance Principals.
|
||||
"""
|
||||
import oci
|
||||
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
|
||||
# RP
|
||||
try:
|
||||
rp_signer = oci.auth.signers.get_resource_principals_signer()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
|
||||
except Exception as e:
|
||||
logging.info("RP not available: %s", e)
|
||||
# IP
|
||||
try:
|
||||
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
||||
cfg = {"region": region} if region else {}
|
||||
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
|
||||
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
|
||||
except Exception as e:
|
||||
logging.info("IP not available: %s", e)
|
||||
|
||||
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
|
||||
raise RuntimeError("Failed to create OCI client")
|
||||
|
||||
def _load_yaml(cfg_path: str) -> dict:
|
||||
import yaml
|
||||
p = Path(cfg_path)
|
||||
if not p.exists():
|
||||
raise FileNotFoundError(f"Config YAML not found: {cfg_path}")
|
||||
return yaml.safe_load(p.read_text()) or {}
|
||||
|
||||
# Build config-derived constants directly from YAML
|
||||
try:
|
||||
CONFIG_DATA = _load_yaml(CONFIG_YAML)
|
||||
OBJECT_PREFIX = CONFIG_DATA.get("inbox_prefix")
|
||||
if not (isinstance(OBJECT_PREFIX, str) and OBJECT_PREFIX.strip()):
|
||||
raise AirflowFailException("YAML must define 'inbox_prefix' for OBJECT_PREFIX.")
|
||||
OBJECT_PREFIX = OBJECT_PREFIX.strip()
|
||||
logging.info("YAML inbox_prefix -> OBJECT_PREFIX: %s", OBJECT_PREFIX)
|
||||
except Exception as e:
|
||||
logging.error("Failed to resolve OBJECT_PREFIX from YAML %s: %s", CONFIG_YAML, e)
|
||||
OBJECT_PREFIX = None
|
||||
|
||||
# New idempotency map (key -> last_processed_ts)
|
||||
def _load_processed_map() -> dict[str, float]:
|
||||
"""
|
||||
Returns {object_key: last_processed_ts}.
|
||||
Back-compat: if old set variable exists (list), treat those keys as ts=0.
|
||||
"""
|
||||
try:
|
||||
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
|
||||
m = json.loads(raw) or {}
|
||||
if isinstance(m, dict):
|
||||
return {k: float(v) for k, v in m.items()}
|
||||
except Exception:
|
||||
pass
|
||||
# Back-compat: migrate old set/list
|
||||
try:
|
||||
old = json.loads(Variable.get(PROCESSED_SET_VAR, default_var="[]"))
|
||||
if isinstance(old, list):
|
||||
return {k: 0.0 for k in old}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_processed_map(m: dict[str, float]) -> None:
|
||||
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
|
||||
|
||||
def _mark_processed_ts(objs: list[tuple[str, float]]):
|
||||
"""
|
||||
Update processed map with list of (object_key, mtime).
|
||||
"""
|
||||
if REPROCESS or not objs:
|
||||
return
|
||||
m = _load_processed_map()
|
||||
for key, ts in objs:
|
||||
try:
|
||||
ts = float(ts)
|
||||
except Exception:
|
||||
continue
|
||||
prev = float(m.get(key, 0.0))
|
||||
if ts > prev:
|
||||
m[key] = ts
|
||||
_save_processed_map(m)
|
||||
logging.info("Processed map updated; size=%d", len(m))
|
||||
|
||||
# Object listing (per-key mtime)
|
||||
def _list_new_xml_objects(prefix: str) -> list[dict]:
|
||||
"""
|
||||
List .xml objects and decide inclusion per-object:
|
||||
include if REPROCESS or object_mtime > processed_map.get(object_key, 0.0)
|
||||
Returns: [{"name": "<full-key>", "base": "<file.xml>", "mtime": <epoch float>}]
|
||||
"""
|
||||
if not OCI_NAMESPACE or not OCI_BUCKET:
|
||||
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
|
||||
|
||||
client = _oci_client()
|
||||
processed_map = _load_processed_map()
|
||||
|
||||
try:
|
||||
last_seen = float(Variable.get(LAST_TS_VAR, default_var="0"))
|
||||
except Exception:
|
||||
last_seen = 0.0
|
||||
|
||||
logging.info("Watermark last_seen=%s; processed_map_count=%d; prefix=%s",
|
||||
last_seen, len(processed_map), prefix)
|
||||
|
||||
# NOTE: add pagination if needed
|
||||
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=prefix)
|
||||
|
||||
new_items: list[dict] = []
|
||||
newest_ts = last_seen
|
||||
|
||||
for o in (resp.data.objects or []):
|
||||
name = (o.name or "").strip()
|
||||
base = name.rsplit("/", 1)[-1] if name else ""
|
||||
logging.info("Processing object: %s", base)
|
||||
|
||||
# Skip folder markers / empty keys
|
||||
if not name or name.endswith('/') or not base:
|
||||
logging.debug("Skip: folder marker or empty key: %r", name)
|
||||
continue
|
||||
|
||||
if not base.lower().endswith(".xml"):
|
||||
logging.debug("Skip: not .xml: %r", name)
|
||||
continue
|
||||
|
||||
# Resolve mtime
|
||||
ts = None
|
||||
t = getattr(o, "time_created", None)
|
||||
if t:
|
||||
try:
|
||||
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
|
||||
except Exception:
|
||||
ts = None
|
||||
|
||||
if ts is None:
|
||||
try:
|
||||
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
|
||||
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
|
||||
if lm:
|
||||
dt = parsedate_to_datetime(lm)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
ts = dt.timestamp()
|
||||
logging.debug("Resolved ts via HEAD Last-Modified for %s: %s", name, ts)
|
||||
except Exception as e:
|
||||
logging.warning("head_object failed for %s: %s", name, e)
|
||||
|
||||
if ts is None:
|
||||
ts = datetime.now(timezone.utc).timestamp()
|
||||
logging.warning("Object %s missing timestamp; falling back to now=%s", name, ts)
|
||||
|
||||
last_proc_ts = float(processed_map.get(name, 0.0))
|
||||
include = REPROCESS or (ts > last_proc_ts)
|
||||
|
||||
logging.info(
|
||||
"Decision for %s: obj_ts=%s, last_proc_ts=%s, REPROCESS=%s -> include=%s",
|
||||
name, ts, last_proc_ts, REPROCESS, include
|
||||
)
|
||||
|
||||
if not include:
|
||||
continue
|
||||
|
||||
item = {"name": name, "base": base, "mtime": ts}
|
||||
new_items.append(item)
|
||||
if ts > newest_ts:
|
||||
newest_ts = ts
|
||||
|
||||
# Watermark advanced for visibility (optional)
|
||||
if not REPROCESS and new_items and newest_ts > last_seen:
|
||||
Variable.set(LAST_TS_VAR, str(newest_ts))
|
||||
logging.info("Advanced watermark from %s to %s", last_seen, newest_ts)
|
||||
|
||||
new_items.sort(key=lambda x: x["mtime"]) # ascending
|
||||
logging.info("Found %d candidate .xml object(s) under prefix %s", len(new_items), prefix)
|
||||
return new_items
|
||||
|
||||
|
||||
# DAG
|
||||
|
||||
with DAG(
|
||||
dag_id=dag_id,
|
||||
default_args=default_args,
|
||||
description='EXDI workflow (polling): single YAML config for all XML files in OCI',
|
||||
schedule_interval=None, # Run EVERY 10 MIN
|
||||
catchup=False,
|
||||
max_active_runs=1,
|
||||
render_template_as_native_obj=True,
|
||||
tags=["EXDI", "MRDS", "ODS", "OCI", "BALANCESHEET"],
|
||||
) as dag:
|
||||
|
||||
@af_task(task_id="poll_oci_for_xml")
|
||||
def poll_oci_for_xml():
|
||||
"""
|
||||
Lists new .xml objects and prepares a workload list.
|
||||
Returns {"workload": [{"object": "<key>", "base": "<file.xml>", "mtime": <float>} ...]}
|
||||
"""
|
||||
if not OBJECT_PREFIX:
|
||||
raise AirflowFailException("No OCI object prefix configured. Check YAML 'inbox_prefix'.")
|
||||
|
||||
new_objs = _list_new_xml_objects(OBJECT_PREFIX)
|
||||
logging.info("New .xml objects found: %s", json.dumps(new_objs, indent=2))
|
||||
print("New .xml objects found:", json.dumps(new_objs, indent=2))
|
||||
|
||||
# already contains base + mtime
|
||||
workload = [{"object": it["name"], "base": it["base"], "mtime": it["mtime"]} for it in new_objs]
|
||||
logging.info("Prepared workload items: %d", len(workload))
|
||||
print("Prepared workload:", json.dumps(workload, indent=2))
|
||||
return {"workload": workload}
|
||||
|
||||
@af_task(task_id="init_workflow")
|
||||
def init_workflow(polled: dict):
|
||||
"""Initialize workflow; start MRDS workflow; build per-file task configs."""
|
||||
database_name = WORKFLOW_CONFIG["database_name"]
|
||||
workflow_name = WORKFLOW_CONFIG["workflow_name"]
|
||||
|
||||
env = os.getenv("MRDS_ENV", "dev")
|
||||
username = os.getenv("MRDS_LOADER_DB_USER")
|
||||
password = os.getenv("MRDS_LOADER_DB_PASS")
|
||||
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
|
||||
|
||||
if not all([username, password, tnsalias]):
|
||||
missing = []
|
||||
if not username: missing.append("MRDS_LOADER_DB_USER")
|
||||
if not password: missing.append("MRDS_LOADER_DB_PASS")
|
||||
if not tnsalias: missing.append("MRDS_LOADER_DB_TNS")
|
||||
raise AirflowFailException(f"Missing required env vars: {', '.join(missing)}")
|
||||
|
||||
workload = (polled or {}).get("workload") or []
|
||||
|
||||
# Airflow context for run_id
|
||||
from airflow.operators.python import get_current_context
|
||||
ctx = get_current_context()
|
||||
run_id = str(ctx['ti'].run_id)
|
||||
|
||||
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
|
||||
|
||||
workflow_context = {
|
||||
"run_id": run_id,
|
||||
"a_workflow_history_key": a_workflow_history_key
|
||||
}
|
||||
|
||||
# Build TASK_CONFIGS dynamically: one per file, sequential numbering
|
||||
task_base_name = "m_ODS_LM_BALANCESHEET"
|
||||
task_configs = []
|
||||
for idx, w in enumerate(workload, start=1):
|
||||
task_configs.append({
|
||||
"task_name": f"{task_base_name}_{idx}",
|
||||
"source_filename": w["base"], # pass basename to MRDS (adjust if you need full key)
|
||||
"config_file": CONFIG_YAML,
|
||||
})
|
||||
|
||||
bundle = {
|
||||
"workflow_history_key": a_workflow_history_key,
|
||||
"workflow_context": workflow_context,
|
||||
"workload": workload, # includes object + mtime
|
||||
"task_configs": task_configs, # list-of-dicts for mapping
|
||||
"env": env,
|
||||
}
|
||||
|
||||
logging.info("Init complete; workload=%d, tasks=%d", len(workload), len(task_configs))
|
||||
return bundle
|
||||
|
||||
@af_task(task_id="get_task_configs")
|
||||
def get_task_configs(init_bundle: dict):
|
||||
return init_bundle["task_configs"]
|
||||
|
||||
def run_mrds_task(task_name: str, source_filename: str, config_file: str, **context):
|
||||
"""Run MRDS for a single file (sequential via mapped task with max_active_tis_per_dag=1)."""
|
||||
ti = context['ti']
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"Config file not found: {config_file}")
|
||||
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
workflow_context = init_bundle.get('workflow_context')
|
||||
workload = init_bundle.get('workload') or []
|
||||
if not workflow_context:
|
||||
raise AirflowFailException("No workflow_context from init_workflow")
|
||||
|
||||
# resolve full object key + mtime by matching base name from workload
|
||||
full_object_key, object_mtime = None, None
|
||||
for w in workload:
|
||||
if w.get('base') == source_filename:
|
||||
full_object_key = w.get('object')
|
||||
object_mtime = w.get('mtime')
|
||||
break
|
||||
|
||||
# Print/log the file being processed
|
||||
logging.info("%s: picking file %s (object=%s, mtime=%s)",
|
||||
task_name, source_filename, full_object_key or source_filename, object_mtime)
|
||||
print(f"{task_name}: picking file {source_filename} (object={full_object_key or source_filename}, mtime={object_mtime})")
|
||||
|
||||
try:
|
||||
# NOTE: if MRDS expects full URI, change 'source_filename' to 'full_object_key'
|
||||
mrds_main(
|
||||
workflow_context,
|
||||
source_filename, # or full_object_key if required in your env
|
||||
config_file,
|
||||
generate_workflow_context=False
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("%s: MRDS failed on %s", task_name, source_filename)
|
||||
raise
|
||||
|
||||
# Mark processed with the mtime we saw during poll
|
||||
if full_object_key and object_mtime:
|
||||
_mark_processed_ts([(full_object_key, object_mtime)])
|
||||
|
||||
ti.xcom_push(key='task_status', value='SUCCESS')
|
||||
logging.info("%s: success", task_name)
|
||||
return "SUCCESS"
|
||||
|
||||
def finalise_workflow_task(**context):
|
||||
"""Finalize workflow across all per-file tasks (mapped)."""
|
||||
from airflow.utils.state import State
|
||||
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
|
||||
a_workflow_history_key = init_bundle.get('workflow_history_key')
|
||||
if a_workflow_history_key is None:
|
||||
raise AirflowFailException("No workflow history key; cannot finalise workflow")
|
||||
|
||||
mapped_task_id = "m_ODS_LM_BALANCESHEET"
|
||||
tis = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
|
||||
|
||||
if not tis:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info("Finalised workflow %s as SUCCESS (no files)", a_workflow_history_key)
|
||||
return
|
||||
|
||||
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
|
||||
if not any_failed:
|
||||
mrds_finalise_workflow(a_workflow_history_key, "Y")
|
||||
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
|
||||
return
|
||||
|
||||
failed_idxs = [getattr(ti_i, "map_index", None) for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
|
||||
mrds_finalise_workflow(a_workflow_history_key, "N")
|
||||
logging.error("Finalised workflow %s as FAILED (failed map indexes=%s)", a_workflow_history_key, failed_idxs)
|
||||
raise AirflowFailException(f"Workflow failed for mapped indexes: {failed_idxs}")
|
||||
|
||||
def check_success_for_mopdb(**context):
|
||||
"""Check if all processing tasks succeeded before triggering MOPDB."""
|
||||
from airflow.utils.state import State
|
||||
|
||||
try:
|
||||
ti = context['ti']
|
||||
dag_run = context['dag_run']
|
||||
|
||||
has_failures = False
|
||||
failure_reasons = []
|
||||
|
||||
# Check finalize_workflow task
|
||||
finalize_task = dag_run.get_task_instance('finalize_workflow')
|
||||
if finalize_task.state == State.FAILED:
|
||||
has_failures = True
|
||||
failure_reasons.append("finalize_workflow failed")
|
||||
|
||||
# Check all mapped tasks (per-file processing)
|
||||
mapped_task_id = "m_ODS_LM_BALANCESHEET"
|
||||
mapped_tasks = [t for t in dag_run.get_task_instances() if t.task_id == mapped_task_id]
|
||||
|
||||
for task_instance in mapped_tasks:
|
||||
if task_instance.state in {State.FAILED, State.UPSTREAM_FAILED}:
|
||||
has_failures = True
|
||||
map_idx = getattr(task_instance, 'map_index', 'unknown')
|
||||
failure_reasons.append(f"Processing task failed at index {map_idx}")
|
||||
|
||||
if has_failures:
|
||||
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
# Check if all mapped tasks were skipped (no files to process)
|
||||
all_skipped = all(t.state == State.SKIPPED for t in mapped_tasks) if mapped_tasks else True
|
||||
|
||||
if all_skipped or not mapped_tasks:
|
||||
error_msg = "All processing tasks were skipped (no files to process) - skipping MOPDB trigger"
|
||||
logging.info(error_msg)
|
||||
raise AirflowSkipException(error_msg)
|
||||
|
||||
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
|
||||
return "SUCCESS"
|
||||
|
||||
except AirflowSkipException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
|
||||
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
|
||||
|
||||
# Operators & Dependencies
|
||||
poll_task = poll_oci_for_xml()
|
||||
init_out = init_workflow(poll_task)
|
||||
task_cfgs = get_task_configs(init_out)
|
||||
|
||||
@af_task(task_id="m_ODS_LM_BALANCESHEET", max_active_tis_per_dag=1)
|
||||
def mapped_run(task_name: str, source_filename: str, config_file: str, **context):
|
||||
return run_mrds_task(task_name=task_name, source_filename=source_filename, config_file=config_file, **context)
|
||||
|
||||
per_file = mapped_run.expand_kwargs(task_cfgs)
|
||||
|
||||
finalize_workflow = PythonOperator(
|
||||
task_id='finalize_workflow',
|
||||
python_callable=finalise_workflow_task,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
check_mopdb = PythonOperator(
|
||||
task_id='check_success_for_mopdb',
|
||||
python_callable=check_success_for_mopdb,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE,
|
||||
retries=0,
|
||||
)
|
||||
|
||||
trigger_mopdb = TriggerDagRunOperator(
|
||||
task_id="Trigger_w_MOPDB_LM_BALANCESHEET",
|
||||
trigger_dag_id="w_MOPDB_LM_BALANCESHEET",
|
||||
conf={
|
||||
"source_dag": dag_id,
|
||||
"upstream_run_id": "{{ run_id }}",
|
||||
"objects": "{{ (ti.xcom_pull(task_ids='poll_oci_for_xml')['workload'] | map(attribute='object') | list) if ti.xcom_pull(task_ids='poll_oci_for_xml') else [] }}",
|
||||
"workflow_history_key": "{{ (ti.xcom_pull(task_ids='init_workflow')['workflow_history_key']) if ti.xcom_pull(task_ids='init_workflow') else None }}"
|
||||
},
|
||||
wait_for_completion=False, # CHANGED: Don't wait for completion
|
||||
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, # CHANGED: Only trigger if check succeeds
|
||||
retries=0,
|
||||
)
|
||||
|
||||
all_good = EmptyOperator(
|
||||
task_id="All_went_well",
|
||||
trigger_rule=TriggerRule.ALL_DONE, # CHANGED: Always run to mark end
|
||||
)
|
||||
|
||||
# CHANGED: Chain with check task before trigger
|
||||
poll_task >> init_out >> task_cfgs >> per_file >> finalize_workflow >> check_mopdb >> trigger_mopdb >> all_good
|
||||
|
||||
logging.info(
|
||||
"EXDI DAG ready: inbox_prefix=%s; using per-object processed ts map %s.",
|
||||
OBJECT_PREFIX, PROCESSED_TS_VAR
|
||||
)
|
||||
0
airflow/ods/lm/csm_adjustment/config/.gitkeep
Normal file
0
airflow/ods/lm/csm_adjustment/config/.gitkeep
Normal file
129
airflow/ods/lm/csm_adjustment/config/csm_adjustment.xsd
Normal file
129
airflow/ods/lm/csm_adjustment/config/csm_adjustment.xsd
Normal file
@@ -0,0 +1,129 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns="http://escb.ecb.int/csm-adjustment"
|
||||
xmlns:lm="http://exdi.ecb.int/lm"
|
||||
targetNamespace="http://escb.ecb.int/csm-adjustment"
|
||||
elementFormDefault="qualified"
|
||||
attributeFormDefault="unqualified">
|
||||
|
||||
<xs:import namespace="http://exdi.ecb.int/lm" schemaLocation="../../lm_common/lm.xsd" />
|
||||
|
||||
<xs:element name="adjustmentMessages">
|
||||
<xs:complexType>
|
||||
<xs:choice>
|
||||
<xs:element ref="csmAdjustmentMessage" />
|
||||
<xs:element ref="quarterlyRevaluationAdjustmentMessage" />
|
||||
<xs:element ref="adhocAdjustmentMessage" />
|
||||
</xs:choice>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:element name="csmAdjustmentMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="header">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="year" type="xs:gYear" />
|
||||
<xs:element name="month" type="month" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="adjustment" type="adjustmentSingleForecast" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:element name="quarterlyRevaluationAdjustmentMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="header">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="year" type="xs:gYear" />
|
||||
<xs:element name="quarter" type="quarter" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:element name="adhocAdjustmentMessage">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="header">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element name="date" type="xs:date" />
|
||||
<xs:element name="version" type="lm:positiveInt" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="adjustment" type="adjustmentMultipleForecasts" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
<xs:complexType name="baseAdjustment">
|
||||
<xs:sequence>
|
||||
<xs:element name="country" type="lm:isoCode" />
|
||||
<xs:element name="effectiveDate" type="xs:date" />
|
||||
<xs:element name="lastDateNotInForecast" type="xs:date" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="adjustmentSingleForecast">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="baseAdjustment">
|
||||
<xs:sequence>
|
||||
<xs:element name="forecastItem" type="forecastItem" />
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="adjustmentMultipleForecasts">
|
||||
<xs:complexContent>
|
||||
<xs:extension base="baseAdjustment">
|
||||
<xs:sequence>
|
||||
<xs:element name="forecastItem" type="forecastItem" minOccurs="1" maxOccurs="unbounded" />
|
||||
</xs:sequence>
|
||||
</xs:extension>
|
||||
</xs:complexContent>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:complexType name="forecastItem">
|
||||
<xs:sequence>
|
||||
<xs:element name="forecastName" type="lm:forecastName" />
|
||||
<xs:element name="adjustmentAmount" type="extendedDecimalEuroValue" />
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:simpleType name="extendedDecimalEuroValue">
|
||||
<xs:restriction base="xs:decimal">
|
||||
<xs:totalDigits value="17" />
|
||||
<xs:fractionDigits value="8" />
|
||||
<xs:minInclusive value="-999999999.99999999" />
|
||||
<xs:maxInclusive value="999999999.99999999" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="month">
|
||||
<xs:restriction base="xs:int">
|
||||
<xs:minInclusive value="1" />
|
||||
<xs:maxInclusive value="12" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:simpleType name="quarter">
|
||||
<xs:restriction base="xs:int">
|
||||
<xs:minInclusive value="1" />
|
||||
<xs:maxInclusive value="4" />
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
</xs:schema>
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user