init
This commit is contained in:
73
python/code_generation/generate_code.py
Normal file
73
python/code_generation/generate_code.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import csv
|
||||
|
||||
environment = Environment(loader=FileSystemLoader("templates/"))
|
||||
template_sq = environment.get_template("m_Template_sq.sql")
|
||||
template_tgt = environment.get_template("m_Template_target.sql")
|
||||
template_db = environment.get_template("db_Template.sql")
|
||||
template_dag = environment.get_template("dag_Template.py")
|
||||
|
||||
csvFile = csv.reader(open("ods_mopdb_plain.txt", "r"))
|
||||
header = next(csvFile) # skip header
|
||||
|
||||
for row in csvFile:
|
||||
|
||||
(schema, table) = row
|
||||
|
||||
# ou_tms,ACTIVITYLOGDUE,TMS,T_ACTIVITYLOGDUE,m_MOPDB_TMS_T_ACTIVITYLOGDUE_OU_TMS_ACTIVITYLOGDUE,w_ODS_TMS_ACTIVITYLOGDUE
|
||||
|
||||
target_table = f"T_{table}"
|
||||
source_schema = f"OU_{schema}"
|
||||
source_schema_lower = f"{source_schema}".lower()
|
||||
source_table = table
|
||||
mapping_name = f"m_MOPDB_{schema}_{target_table}_{source_schema}_{source_table}"
|
||||
workflow_name = f"w_MOPDB_{schema}_{target_table}"
|
||||
|
||||
|
||||
content = template_sq.render(
|
||||
source_schema = source_schema_lower,
|
||||
source_table = table,
|
||||
target_schema = schema,
|
||||
target_table = target_table,
|
||||
mapping_name = mapping_name,
|
||||
workflow_name = workflow_name
|
||||
)
|
||||
filename=f"source_qualifiers/{mapping_name}_SQ.sql"
|
||||
with open(filename, mode="w", encoding="utf-8") as message:
|
||||
message.write(content)
|
||||
print(f"... wrote {filename}")
|
||||
|
||||
content = template_tgt.render(
|
||||
source_schema = source_schema_lower,
|
||||
source_table = table,
|
||||
target_schema = schema,
|
||||
target_table = target_table,
|
||||
mapping_name = mapping_name,
|
||||
workflow_name = workflow_name
|
||||
)
|
||||
filename=f"targets/{mapping_name}.sql"
|
||||
with open(filename, mode="w", encoding="utf-8") as message:
|
||||
message.write(content)
|
||||
print(f"... wrote {filename}")
|
||||
|
||||
content = template_dag.render(
|
||||
table = table
|
||||
)
|
||||
filename=f"dags/{workflow_name}.py"
|
||||
with open(filename, mode="w", encoding="utf-8") as message:
|
||||
message.write(content)
|
||||
print(f"... wrote {filename}")
|
||||
|
||||
content = template_db.render(
|
||||
table = table,
|
||||
schema = schema
|
||||
)
|
||||
filename=f"db/{source_schema}_{source_table}.sql"
|
||||
with open(filename, mode="w", encoding="utf-8") as message:
|
||||
message.write(content)
|
||||
print(f"... wrote {filename}")
|
||||
filename=f"db/table_changes.sql"
|
||||
with open(filename, mode="a", encoding="utf-8") as message:
|
||||
message.write(f"{content}\n")
|
||||
print(f"... wrote {filename}")
|
||||
|
||||
30
python/code_generation/ods_mopdb_plain.txt
Normal file
30
python/code_generation/ods_mopdb_plain.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
SCHEMA,TABLE
|
||||
TMS,ACMENTRYSTATELEDGERGROUP
|
||||
TMS,ACTIVITYLOGDUE
|
||||
TMS,ACTIVITY_LOG
|
||||
TMS,BALANCE
|
||||
TMS,BLACKOUT_LOG
|
||||
TMS,BRANCH
|
||||
TMS,CALENDAR
|
||||
TMS,CASHFLOW
|
||||
TMS,CLIENT
|
||||
TMS,CUSTODYBALANCE
|
||||
TMS,ECBINSTRUMENTBONDCASHFLOW
|
||||
TMS,EFFECTIVEROLEPROFILE
|
||||
TMS,FINMESSAGELOG
|
||||
TMS,HISTORY_LOG
|
||||
TMS,INSTRUMENTBONDCASHFLOW
|
||||
TMS,INSTRUMENT_REPORT
|
||||
TMS,MARKETINFO
|
||||
TMS,PARAMETER
|
||||
TMS,PORTFOLIOTREE
|
||||
TMS,PRICES
|
||||
TMS,PROPERTY
|
||||
TMS,RECONCILIATION
|
||||
TMS,ROLEPORTFOLIOPROFILE
|
||||
TMS,RULES
|
||||
TMS,SDM_ENTITY_STATE
|
||||
TMS,SECURITYPOSITION
|
||||
TMS,SETTLEMENTCASHFLOW
|
||||
TMS,SETTLEMENTLOG
|
||||
TMS,USERINFORMATION
|
||||
95
python/code_generation/templates/dag_Template.py
Normal file
95
python/code_generation/templates/dag_Template.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from airflow.decorators import dag
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
||||
|
||||
# Define paths to your dbt files
|
||||
dbt_root_path = "/home/dbt/DBT/mrds"
|
||||
dbt_profiles_dir = "/home/dbt/.dbt/profiles.yml"
|
||||
|
||||
ODS_TABLE = "{{table}}"
|
||||
DATABASE_NAME = "MOPDB"
|
||||
DAG_NAME = f"w_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}"
|
||||
MAPPING_NAME = f"m_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}"
|
||||
|
||||
|
||||
# Define function for the retrieval of the current run_id
|
||||
def retrieve_run_id(**kwargs):
|
||||
# Retrieve the run_id from the Airflow context
|
||||
run_id = kwargs['run_id']
|
||||
# Store the run_id in XCom for future reference
|
||||
ti = kwargs['ti']
|
||||
ti.xcom_push(key='run_id', value=run_id)
|
||||
return run_id
|
||||
|
||||
def check_dag_status(**kwargs):
|
||||
for task_instance in kwargs['dag_run'].get_task_instances():
|
||||
if task_instance.state == 'failed' and task_instance.task_id != kwargs['task_instance'].task_id:
|
||||
raise Exception("Task {} failed. Failing this DAG run".format(task_instance.task_id))
|
||||
|
||||
# Define function for the check of the status of the previous tasks
|
||||
def determine_workflow_status(**kwargs):
|
||||
# Check the status of previous tasks
|
||||
task_statuses = kwargs['ti'].xcom_pull(task_ids=['retrieve_run_id', 'control_external_run_start', 'mapping_mopdb'])
|
||||
# If any task failed, set workflow_status to 'N', otherwise 'Y'
|
||||
workflow_status = 'N' if any(status != 'success' for status in task_statuses) else 'Y'
|
||||
return workflow_status
|
||||
|
||||
@dag(
|
||||
dag_id=DAG_NAME,
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(2),
|
||||
catchup=False
|
||||
)
|
||||
def run_dag():
|
||||
# Retrieve run_id
|
||||
retrieve_run_id_task = PythonOperator(
|
||||
task_id='retrieve_run_id',
|
||||
python_callable=retrieve_run_id,
|
||||
provide_context=True,
|
||||
# pool='my_custom_pool', # Create pool in Airflow Web UI with one slot to ensure that only one dag can run it at a time.
|
||||
)
|
||||
# Run dbt macro control_external_run_start
|
||||
control_external_run_start = BashOperator(
|
||||
task_id='control_external_run_start',
|
||||
bash_command=(
|
||||
'cd /home/dbt/DBT/mrds && '
|
||||
'dbt run-operation control_external_run_start --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' '
|
||||
'--profiles-dir /home/dbt/.dbt/ --target dev'
|
||||
)
|
||||
)
|
||||
# run dbt taskGroup with tag of the mapping name
|
||||
dbtTaskGroup = DbtTaskGroup(
|
||||
group_id=MAPPING_NAME,
|
||||
project_config=ProjectConfig(
|
||||
dbt_project_path = dbt_root_path),
|
||||
profile_config=ProfileConfig(
|
||||
profiles_yml_filepath = dbt_profiles_dir,
|
||||
profile_name="mrds",
|
||||
target_name="dev"),
|
||||
render_config=RenderConfig(select=[f"tag:{MAPPING_NAME}"],),
|
||||
operator_args={'vars': {'orchestration_run_id': '{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}', "input_service_name": DATABASE_NAME, "workflow_name": DAG_NAME }}
|
||||
)
|
||||
|
||||
control_external_run_end = BashOperator(
|
||||
task_id='control_external_run_end',
|
||||
bash_command=(
|
||||
'cd /home/dbt/DBT/mrds && '
|
||||
'dbt run-operation control_external_run_end --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' '
|
||||
'--profiles-dir /home/dbt/.dbt/ --target dev'
|
||||
),
|
||||
trigger_rule=TriggerRule.ALL_DONE # Run regardless of previous task outcomes
|
||||
)
|
||||
|
||||
dag_status = PythonOperator(
|
||||
task_id='dag_status',
|
||||
provide_context=True,
|
||||
python_callable=check_dag_status,
|
||||
trigger_rule=TriggerRule.ALL_DONE, # Ensures this task runs even if upstream fails
|
||||
)
|
||||
|
||||
# Set task dependencies
|
||||
retrieve_run_id_task >> control_external_run_start >> [dbtTaskGroup] >> control_external_run_end >> dag_status
|
||||
globals()[DAG_NAME] = run_dag()
|
||||
2
python/code_generation/templates/db_Template.sql
Normal file
2
python/code_generation/templates/db_Template.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
ALTER TABLE OU_{{schema}}.{{table}} RENAME COLUMN A_ETL_LOAD_SET_FK TO A_WORKFLOW_HISTORY_KEY;
|
||||
DROP TABLE {{schema}}.T_{{table}};
|
||||
11
python/code_generation/templates/m_Template_sq.sql
Normal file
11
python/code_generation/templates/m_Template_sq.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
{% raw %}{{{% endraw %}
|
||||
config(
|
||||
materialized="table",
|
||||
tags=["{{mapping_name}}", "{{source_schema}}", "{{target_schema}}"],
|
||||
alias="{{target_table}}_SQ",
|
||||
schema="{{target_schema}}"
|
||||
)
|
||||
{% raw %}}}{% endraw %}
|
||||
{% raw %}{{{% endraw %}
|
||||
create_table_from_source("{{source_schema}}","{{source_table}}","{{workflow_name}}",get_main_task_name(model.name),['A_KEY','A_WORKFLOW_HISTORY_KEY'])
|
||||
{% raw %}}}{% endraw %}
|
||||
13
python/code_generation/templates/m_Template_target.sql
Normal file
13
python/code_generation/templates/m_Template_target.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
{% raw %}-- depends_on: {{{% endraw %} ref('{{mapping_name}}_SQ') {% raw %}}} {% endraw %}
|
||||
{% raw %}{{{% endraw %}
|
||||
config(
|
||||
tags=["{{mapping_name}}","MOPDB","{{target_schema}}"],
|
||||
alias="{{target_table}}",
|
||||
schema="{{target_schema}}",
|
||||
materialized="incremental" ,
|
||||
incremental_strategy="merge"
|
||||
)
|
||||
{% raw %}}}{% endraw %}
|
||||
{% raw %}{{{% endraw %}
|
||||
create_table_target('{{mapping_name}}_SQ')
|
||||
{% raw %}}}{% endraw %}
|
||||
Reference in New Issue
Block a user