147 lines
4.7 KiB
Python
147 lines
4.7 KiB
Python
import os
|
|
from airflow.decorators import dag
|
|
from airflow.operators.bash import BashOperator
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.utils.dates import days_ago
|
|
from airflow.utils.trigger_rule import TriggerRule
|
|
from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
|
|
|
|
DAG_NAME = os.path.splitext(os.path.basename(__file__))[0]
|
|
ENV_NAME = os.getenv("MRDS_ENV", "").lower()
|
|
|
|
DATABASE_NAME_MAP = {
|
|
"dev": "MOPDB",
|
|
"test": "MOPDB_TEST",
|
|
}
|
|
DATABASE_NAME = DATABASE_NAME_MAP.get(ENV_NAME, "MOPDB")
|
|
|
|
# check cron 2
|
|
|
|
dbt_root_path = "/opt/dbt"
|
|
dbt_profiles_dir = "/opt/dbt/profiles.yml"
|
|
dbt_profiles_dir_parent = "/opt/dbt"
|
|
|
|
dbt_env = {
|
|
"DBT_PROFILES_DIR": dbt_profiles_dir_parent,
|
|
"DBT_TARGET": ENV_NAME,
|
|
"MRDS_LOADER_DB_USER": os.getenv("MRDS_LOADER_DB_USER"),
|
|
"MRDS_LOADER_DB_PASS": os.getenv("MRDS_LOADER_DB_PASS"),
|
|
"MRDS_LOADER_DB_TNS": os.getenv("MRDS_LOADER_DB_TNS", "XE"),
|
|
"MRDS_SCHEMA": os.getenv("MRDS_SCHEMA", "CT_MRDS"),
|
|
"MRDS_PROTOCOL": os.getenv("MRDS_PROTOCOL", "tcps"),
|
|
"MRDS_THREADS": os.getenv("MRDS_THREADS", "4"),
|
|
|
|
"DBT_LOG_PATH": "/opt/dbt/logs",
|
|
"DBT_TARGET_PATH": "/opt/dbt/target",
|
|
"PYTHONUNBUFFERED": "1",
|
|
}
|
|
|
|
def retrieve_run_id(**kwargs):
|
|
run_id = kwargs["run_id"]
|
|
kwargs["ti"].xcom_push(key="run_id", value=run_id)
|
|
return run_id
|
|
|
|
def check_dag_status(**kwargs):
|
|
for ti in kwargs["dag_run"].get_task_instances():
|
|
if ti.state == "failed" and ti.task_id != kwargs["task_instance"].task_id:
|
|
raise Exception(f"Task {ti.task_id} failed. Failing this DAG run")
|
|
|
|
@dag(
|
|
dag_id=DAG_NAME,
|
|
schedule_interval=None,
|
|
start_date=days_ago(2),
|
|
catchup=False,
|
|
)
|
|
def run_dag():
|
|
def read_vars(**context):
|
|
BUCKET = os.getenv("INBOX_BUCKET")
|
|
BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
|
|
print("========= DBT ENV =========")
|
|
print(f"BUCKET_NAMESPACE: {BUCKET_NAMESPACE}, BUCKET : {BUCKET}")
|
|
return 1
|
|
|
|
read_vars_task = PythonOperator(
|
|
task_id="read_vars",
|
|
python_callable=read_vars,
|
|
provide_context=True,
|
|
)
|
|
|
|
retrieve_run_id_task = PythonOperator(
|
|
task_id="retrieve_run_id",
|
|
python_callable=retrieve_run_id,
|
|
provide_context=True,
|
|
)
|
|
|
|
control_external_run_start = BashOperator(
|
|
task_id="control_external_run_start",
|
|
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
|
env=dbt_env,
|
|
bash_command="""
|
|
set -euxo pipefail
|
|
cd /opt/dbt
|
|
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
|
run-operation control_external_run_start \
|
|
--vars '{{ {
|
|
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
|
"input_service_name": params.db,
|
|
"workflow_name": params.wf
|
|
} | tojson }}'
|
|
""",
|
|
)
|
|
|
|
common_profile = ProfileConfig(
|
|
profiles_yml_filepath=dbt_profiles_dir,
|
|
profile_name="mrds",
|
|
target_name=ENV_NAME,
|
|
)
|
|
common_project = ProjectConfig(dbt_project_path=dbt_root_path)
|
|
common_vars = {
|
|
"orchestration_run_id": "{{ ti.xcom_pull(task_ids='retrieve_run_id', key='run_id') }}",
|
|
"input_service_name": DATABASE_NAME,
|
|
"workflow_name": DAG_NAME,
|
|
}
|
|
common_operator_args = {
|
|
"vars": common_vars,
|
|
"env": dbt_env,
|
|
}
|
|
|
|
m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT = DbtTaskGroup(
|
|
group_id="m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT",
|
|
project_config=common_project,
|
|
profile_config=common_profile,
|
|
render_config=RenderConfig(select=[
|
|
"tag:m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT",
|
|
]),
|
|
operator_args=common_operator_args,
|
|
)
|
|
|
|
|
|
control_external_run_end = BashOperator(
|
|
task_id="control_external_run_end",
|
|
params={"db": DATABASE_NAME, "wf": DAG_NAME},
|
|
env=dbt_env,
|
|
bash_command="""
|
|
set -euxo pipefail
|
|
cd /opt/dbt
|
|
dbt --log-format json --log-level debug --debug --log-path /opt/dbt/logs \
|
|
run-operation control_external_run_end \
|
|
--vars '{{ {
|
|
"orchestration_run_id": ti.xcom_pull(task_ids="retrieve_run_id", key="run_id"),
|
|
"input_service_name": params.db,
|
|
"workflow_name": params.wf
|
|
} | tojson }}'
|
|
""",
|
|
trigger_rule=TriggerRule.ALL_DONE,
|
|
)
|
|
|
|
dag_status = PythonOperator(
|
|
task_id="dag_status",
|
|
provide_context=True,
|
|
python_callable=check_dag_status,
|
|
trigger_rule=TriggerRule.ALL_DONE,
|
|
)
|
|
|
|
read_vars_task >> retrieve_run_id_task >> control_external_run_start >> m_MOPDB_CSDB_DEBT_OU_CSDB_DEBT >> control_external_run_end >> dag_status
|
|
|
|
globals()[DAG_NAME] = run_dag()
|