This commit is contained in:
Grzegorz Michalski
2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
# Flow Configuration
CONNECTOR_TYPE: "casper_connector"
ODS_PREFIX: "INBOX/RQSD/RQSD_PROCESS/RQSD_ANNEX_"
TASK_NAME: "m_ODS_RQSD_CASPER"
OUTPUT_TABLE: "RQSD_ANNEX_"
COLLECTION_ID: "1537"

View File

@@ -0,0 +1,19 @@
# Flow Configuration
CONNECTOR_TYPE: "devo_connector"
ODS_PREFIX: "INBOX/RQSD/RQSD_PROCESS"
TASK_NAME: "m_ODS_RQSD_FX"
OUTPUT_TABLE: "RQSD_FX"
DEVO_QUERY: "select * from
(select tec_ingestion_date,
obs_value,
pow(10, cast(unit_mult as int)) as divisor,
series_key,
time_period,
ROW_NUMBER() OVER (PARTITION BY series_key, time_period ORDER BY tec_ingestion_date DESC) AS rn
from crp_other_pub.fx
where freq = 'Q'
and data_type_fm = 'HSTE'
and provider_fm = 'BL'
and instrument_fm = 'FX'
and currency != 'EUR') t
where t.rn = 1"

View File

@@ -0,0 +1,34 @@
# static configs
tmpdir: /tmp
inbox_prefix: INBOX/RQSD/RQSD_PROCESS
archive_prefix: ARCHIVE/RQSD/RQSD_PROCESS
workflow_name: w_ODS_RQSD_PROCESS_DEVO
validation_schema_path: None
file_type: csv
# task configs
tasks:
- task_name: m_ODS_RQSD_FX_PARSE
ods_prefix: INBOX/RQSD/RQSD_PROCESS/RQSD_FX
output_table: RQSD_FX
output_columns:
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'tec_ingestion_date'
column_header: 'tec_ingestion_date'
- type: 'csv_header'
value: 'obs_value'
column_header: 'obs_value'
- type: 'csv_header'
value: 'divisor'
column_header: 'divisor'
- type: 'csv_header'
value: 'series_key'
column_header: 'series_key'
- type: 'csv_header'
value: 'time_period'
column_header: 'time_period'
- type: 'csv_header'
value: 'rn'
column_header: 'rn'

View File

@@ -0,0 +1,31 @@
# Flow Configuration
CONNECTOR_TYPE: "devo_connector"
ODS_PREFIX: "INBOX/RQSD/RQSD_PROCESS"
TASK_NAME: "m_ODS_RQSD_OBSERVATIONS"
OUTPUT_TABLE: "RQSD_OBSERVATIONS"
DEVO_QUERY: "with latest_dates as (
SELECT max(receivedfilereceiveddate) as max_date FROM crp_rqsd.rqsd_annex_1_1_all_mrds
UNION
SELECT max(receivedfilereceiveddate) as max_date FROM crp_rqsd.rqsd_annex_1_2_all_mrds
UNION
SELECT max(receivedfilereceiveddate) as max_date FROM crp_rqsd.rqsd_annex_1_1_fin_all_mrds
UNION
SELECT max(receivedfilereceiveddate) as max_date FROM crp_rqsd.rqsd_annex_1_2_fin_all_mrds
UNION
SELECT max(receivedfilereceiveddate) as max_date FROM crp_rqsd.rqsd_annex_2_all_mrds),
latest_update as (
SELECT max(max_date) as max_date FROM latest_dates),
latest as (
select case when count(max_date) = 0 then '1999-01-01 10:28:22' else max(max_date) end max_date
from latest_update)
SELECT a.*,
CAST(NULL AS STRING) AS tec_source_system,
CAST(NULL AS STRING) AS tec_dataset,
CAST(NULL AS STRING) AS tec_surrogate_key,
CAST(NULL AS STRING) AS tec_crc,
CAST(NULL AS STRING) AS tec_ingestion_date,
CAST(NULL AS STRING) AS tec_version_id,
CAST(NULL AS STRING) AS tec_execution_date,
CAST(NULL AS STRING) AS tec_run_id,
CAST(NULL AS STRING) AS tec_business_date
FROM crp_rqsd.tcrqsd_observations a where receivedfilereceiveddate > (select max(max_date) from latest)"

View File

@@ -0,0 +1,181 @@
# static configs
tmpdir: /tmp
inbox_prefix: INBOX/RQSD/RQSD_PROCESS
archive_prefix: ARCHIVE/RQSD/RQSD_PROCESS
workflow_name: w_ODS_RQSD_PROCESS_DEVO
validation_schema_path: None
file_type: csv
# task configs
tasks:
- task_name: m_ODS_RQSD_OBSERVATIONS_PARSE
ods_prefix: INBOX/RQSD/RQSD_PROCESS/RQSD_OBSERVATIONS
output_table: RQSD_OBSERVATIONS
output_columns:
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'datacollectioncode'
column_header: 'datacollectioncode'
- type: 'csv_header'
value: 'datacollectionname'
column_header: 'datacollectionname'
- type: 'csv_header'
value: 'datacollectionowner'
column_header: 'datacollectionowner'
- type: 'csv_header'
value: 'reportingcyclename'
column_header: 'reportingcyclename'
- type: 'csv_header'
value: 'reportingcyclestatus'
column_header: 'reportingcyclestatus'
- type: 'csv_header'
value: 'modulecode'
column_header: 'modulecode'
- type: 'csv_header'
value: 'modulename'
column_header: 'modulename'
- type: 'csv_header'
value: 'moduleversionnumber'
column_header: 'moduleversionnumber'
- type: 'csv_header'
value: 'reportingentitycollectionuniqueid'
column_header: 'reportingentitycollectionuniqueid'
- type: 'csv_header'
value: 'entityattributereportingcode'
column_header: 'entityattributereportingcode'
- type: 'csv_header'
value: 'reportingentityname'
column_header: 'reportingentityname'
- type: 'csv_header'
value: 'reportingentityentitytype'
column_header: 'reportingentityentitytype'
- type: 'csv_header'
value: 'entityattributecountry'
column_header: 'entityattributecountry'
- type: 'csv_header'
value: 'entitygroupentityname'
column_header: 'entitygroupentityname'
- type: 'csv_header'
value: 'obligationmodulereferencedate'
column_header: 'obligationmodulereferencedate'
- type: 'csv_header'
value: 'obligationmoduleremittancedate'
column_header: 'obligationmoduleremittancedate'
- type: 'csv_header'
value: 'receivedfilereceiveddate'
column_header: 'receivedfilereceiveddate'
- type: 'csv_header'
value: 'obligationmoduleexpected'
column_header: 'obligationmoduleexpected'
- type: 'csv_header'
value: 'receivedfileversionnumber'
column_header: 'receivedfileversionnumber'
- type: 'csv_header'
value: 'revalidationversionnumber'
column_header: 'revalidationversionnumber'
- type: 'csv_header'
value: 'revalidationdate'
column_header: 'revalidationdate'
- type: 'csv_header'
value: 'receivedfilesystemfilename'
column_header: 'receivedfilesystemfilename'
- type: 'csv_header'
value: 'obligationstatusstatus'
column_header: 'obligationstatusstatus'
- type: 'csv_header'
value: 'filestatussetsubmissionstatus'
column_header: 'filestatussetsubmissionstatus'
- type: 'csv_header'
value: 'filestatussetvalidationstatus'
column_header: 'filestatussetvalidationstatus'
- type: 'csv_header'
value: 'filestatussetexternalvalidationstatus'
column_header: 'filestatussetexternalvalidationstatus'
- type: 'csv_header'
value: 'numberoferrors'
column_header: 'numberoferrors'
- type: 'csv_header'
value: 'numberofwarnings'
column_header: 'numberofwarnings'
- type: 'csv_header'
value: 'delayindays'
column_header: 'delayindays'
- type: 'csv_header'
value: 'failedattempts'
column_header: 'failedattempts'
- type: 'csv_header'
value: 'observationvalue'
column_header: 'observationvalue'
- type: 'csv_header'
value: 'observationtextvalue'
column_header: 'observationtextvalue'
- type: 'csv_header'
value: 'observationdatevalue'
column_header: 'observationdatevalue'
- type: 'csv_header'
value: 'datapointsetdatapointidentifier'
column_header: 'datapointsetdatapointidentifier'
- type: 'csv_header'
value: 'datapointsetlabel'
column_header: 'datapointsetlabel'
- type: 'csv_header'
value: 'obsrvdescdatatype'
column_header: 'obsrvdescdatatype'
- type: 'csv_header'
value: 'ordinatecode'
column_header: 'ordinatecode'
- type: 'csv_header'
value: 'ordinateposition'
column_header: 'ordinateposition'
- type: 'csv_header'
value: 'tablename'
column_header: 'tablename'
- type: 'csv_header'
value: 'isstock'
column_header: 'isstock'
- type: 'csv_header'
value: 'scale'
column_header: 'scale'
- type: 'csv_header'
value: 'currency'
column_header: 'currency'
- type: 'csv_header'
value: 'numbertype'
column_header: 'numbertype'
- type: 'csv_header'
value: 'ismandatory'
column_header: 'ismandatory'
- type: 'csv_header'
value: 'decimalplaces'
column_header: 'decimalplaces'
- type: 'csv_header'
value: 'serieskey'
column_header: 'serieskey'
- type: 'csv_header'
value: 'tec_source_system'
column_header: 'tec_source_system'
- type: 'csv_header'
value: 'tec_dataset'
column_header: 'tec_dataset'
- type: 'csv_header'
value: 'tec_surrogate_key'
column_header: 'tec_surrogate_key'
- type: 'csv_header'
value: 'tec_crc'
column_header: 'tec_crc'
- type: 'csv_header'
value: 'tec_ingestion_date'
column_header: 'tec_ingestion_date'
- type: 'csv_header'
value: 'tec_version_id'
column_header: 'tec_version_id'
- type: 'csv_header'
value: 'tec_execution_date'
column_header: 'tec_execution_date'
- type: 'csv_header'
value: 'tec_run_id'
column_header: 'tec_run_id'
- type: 'csv_header'
value: 'tec_business_date'
column_header: 'tec_business_date'

View File

@@ -0,0 +1,736 @@
# Flow Configuration
CONNECTOR_TYPE: "devo_connector"
ODS_PREFIX: "INBOX/RQSD/RQSD_PROCESS"
TASK_NAME: "m_ODS_RQSD_SUBA_DEVO"
OUTPUT_TABLE: "RQSD_SUBA_DEVO"
DEVO_QUERY: "with suba_reporting_requirements_temp as (
SELECT sender,
riad_code,
name,
tid,
entity_id,
module_name,
CASE
WHEN cons_level = 'IND' THEN 'SOLO'
WHEN cons_level = 'CON' THEN 'CONS'
ELSE cons_level
END AS cons_level,
accounting_standard,
template_id,
reference_date,
expected,
module_id,
dsd_id,
changed_date,
tec_source_system,
tec_dataset,
tec_surrogate_key,
tec_crc,
tec_ingestion_date,
tec_version_id,
tec_execution_date,
tec_run_id,
tec_business_date
FROM crp_suba06.suba_reporting_requirements),
observations_1_2 as
(
select OBLIGATIONMODULEREFERENCEDATE,
RECEIVEDFILERECEIVEDDATE,
SUBSTR(SERIESKEY, INSTR(SERIESKEY, ':') + 1) AS ETWDR_COMP_KEY,
REPORTINGENTITYCOLLECTIONUNIQUEID,
DATAPOINTSETDATAPOINTIDENTIFIER,
NVL(NVL(CAST(OBSERVATIONVALUE AS STRING), OBSERVATIONTEXTVALUE), OBSERVATIONDATEVALUE) AS OBS_VALUE
FROM crp_rqsd.rqsd_rqsd01_observations
WHERE MODULECODE = 'SCOPF'
AND TABLENAME = 'etwdr'
AND obligationmodulereferencedate not like '%2019-12-06%'
AND obligationmodulereferencedate not like '%2017%'),
max_version_1_2_fin_all as
(
SELECT max(RECEIVEDFILERECEIVEDDATE) as max_version, reportingentitycollectionuniqueid, obligationmodulereferencedate
FROM observations_1_2
group by reportingentitycollectionuniqueid, obligationmodulereferencedate),
annex_1_2_fin_all_pivoted as
(SELECT
OBLIGATIONMODULEREFERENCEDATE,
ETWDR_COMP_KEY,
RECEIVEDFILERECEIVEDDATE,
MAX(CASE WHEN DATAPOINTSETDATAPOINTIDENTIFIER = 'ann_1_2_ref_date' THEN OBS_VALUE END) AS ANN_1_2_REF_DATE,
MAX(CASE WHEN DATAPOINTSETDATAPOINTIDENTIFIER = 'etwdr_mfi_id' THEN OBS_VALUE END) AS MFI_ID,
MAX(CASE WHEN DATAPOINTSETDATAPOINTIDENTIFIER = 'etwdr_lei' THEN OBS_VALUE END) AS LEGAL_ENTITY_ID,
MAX(CASE WHEN DATAPOINTSETDATAPOINTIDENTIFIER = 'etwdr_name' THEN OBS_VALUE END) AS INST_NAME,
MAX(CASE WHEN DATAPOINTSETDATAPOINTIDENTIFIER = 'etwdr_submitter' THEN OBS_VALUE END) AS ETWDR_SUBMITTER
FROM
observations_1_2
WHERE RECEIVEDFILERECEIVEDDATE in (select max_version from max_version_1_2_fin_all)
GROUP BY
OBLIGATIONMODULEREFERENCEDATE, ETWDR_COMP_KEY, RECEIVEDFILERECEIVEDDATE),
scopf as
(
select mfi_id, inst_name, legal_entity_id, obligationmodulereferencedate, ann_1_2_ref_date from annex_1_2_fin_all_pivoted where etwdr_submitter = 'SUP'
),
EXP_COREP_CONS as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'N'
end as EXP_COREP_CONS
from suba_reporting_requirements_temp
where template_id = 'tgC_03.00'
and MODULE_NAME = 'COREP_OF'
and CONS_LEVEL = 'CONS'
),
EXP_COREP_SOLO as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'N'
end as EXP_COREP_SOLO
from suba_reporting_requirements_temp
where template_id = 'tgC_03.00'
and MODULE_NAME = 'COREP_OF'
and CONS_LEVEL = 'SOLO'
),
EXP_FINREP_CONS as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'N'
end as EXP_FINREP_CONS
from suba_reporting_requirements_temp
where template_id = 'tgF_00.01'
and MODULE_NAME in ('FINREP9', 'FINREP9_DP')
and CONS_LEVEL = 'CONS'
),
EXP_FINREP_SOLO as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'N'
end as EXP_FINREP_SOLO
from suba_reporting_requirements_temp
where template_id = 'tgF_00.01'
and MODULE_NAME in ('FINREP9', 'FINREP9_DP')
and CONS_LEVEL = 'SOLO'
),
EXP_LEV_CONS as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'full&trans'
when EXPECTED = 'O' then 'none'
else 'none'
end as EXP_LEV_CONS
from suba_reporting_requirements_temp
where template_id = 'tgC_47.00'
and MODULE_NAME in ('COREP_LR')
and CONS_LEVEL = 'CONS'
),
EXP_LEV_SOLO as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'full&trans'
when EXPECTED = 'O' then 'none'
else 'none'
end as EXP_LEV_SOLO
from suba_reporting_requirements_temp
where template_id = 'tgC_47.00'
and MODULE_NAME in ('COREP_LR')
and CONS_LEVEL = 'SOLO'
),
EXP_LCR_CONS as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'O'
end as EXP_LCR_CONS
from suba_reporting_requirements_temp
where template_id = 'tgC_76.00'
and MODULE_NAME = 'COREP_LCR_DA'
and CONS_LEVEL = 'CONS'
),
EXP_LCR_SOLO as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'O'
end as EXP_LCR_SOLO
from suba_reporting_requirements_temp
where template_id = 'tgC_76.00'
and MODULE_NAME = 'COREP_LCR_DA'
and CONS_LEVEL = 'SOLO'
),
EXP_NSFR_CONS as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'O'
end as EXP_NSFR_CONS
from suba_reporting_requirements_temp
where template_id = 'tgC_84.00'
and MODULE_NAME = 'COREP_NSFR'
and CONS_LEVEL = 'CONS'
),
EXP_NSFR_SOLO as (
select riad_code,
reference_date,
module_name,
cons_level,
template_id,
case
when EXPECTED = 'E' then 'Y'
when EXPECTED = 'O' then 'O'
else 'O'
end as EXP_NSFR_SOLO
from suba_reporting_requirements_temp
where template_id = 'tgC_84.00'
and MODULE_NAME = 'COREP_NSFR'
and CONS_LEVEL = 'SOLO'
),
corep_cons_c100 as (
select riad_code,
reported_period,
cons_level,
name,
lei,
c_0100_r0010_c0010,
c_0100_r0020_c0010,
c_0100_r0015_c0010
from crp_suba06.suba_c_0100
where cons_level = 'CONS'
),
corep_solo_c100 as (
select riad_code,
reported_period,
cons_level,
lei,
c_0100_r0010_c0010,
c_0100_r0020_c0010,
c_0100_r0015_c0010
from crp_suba06.suba_c_0100
where cons_level = 'SOLO'
),
corep_cons_c300 as (
select riad_code,
reported_period,
cons_level,
lei,
c_0300_r0010_c0010,
c_0300_r0030_c0010,
c_0300_r0050_c0010
from crp_suba06.suba_c_0300
where cons_level = 'CONS'
),
corep_solo_c300 as (
select riad_code,
reported_period,
cons_level,
lei,
c_0300_r0010_c0010,
c_0300_r0030_c0010,
c_0300_r0050_c0010
from crp_suba06.suba_c_0300
where cons_level = 'SOLO'
),
corep_cons_c200 as (
select riad_code,
reported_period,
cons_level,
lei,
c_0200_r0010_c0010
from crp_suba06.suba_c_0200
where cons_level = 'CONS' and reported_period<'2025-03-31 00:00:00'
union all
select riad_code,
reported_period,
cons_level,
lei,
c_0200a_r0010_c0010
from crp_suba06.suba_c_0200a as c_0200_r0010_c0010
where cons_level = 'CONS' and reported_period>='2025-03-31 00:00:00'
),
corep_solo_c200 as (
select riad_code,
reported_period,
cons_level,
lei,
c_0200_r0010_c0010
from crp_suba06.suba_c_0200
where cons_level = 'SOLO' and reported_period<'2025-03-31 00:00:00'
union all
select riad_code,
reported_period,
cons_level,
lei,
c_0200a_r0010_c0010
from crp_suba06.suba_c_0200a as c_0200_r0010_c0010
where cons_level = 'SOLO' and reported_period>='2025-03-31 00:00:00'
),
finrep_cons as (
select riad_code,
reported_period,
cons_level,
lei,
f_0101_r0380_c0010
from crp_suba06.suba_f_0101
where cons_level = 'CONS'
),
finrep_solo as (
select riad_code,
reported_period,
cons_level,
lei,
f_0101_r0380_c0010
from crp_suba06.suba_f_0101
where cons_level = 'SOLO'
),
lev_cons as (
select riad_code,
reported_period,
cons_level,
lei,
c_4700_r0330_c0010,
c_4700_r0340_c0010,
c_4700_r0410_c0010
from crp_suba06.suba_c_4700
where cons_level = 'CONS'
),
lev_solo as (
select riad_code,
reported_period,
cons_level,
lei,
c_4700_r0330_c0010,
c_4700_r0340_c0010,
c_4700_r0410_c0010
from crp_suba06.suba_c_4700
where cons_level = 'SOLO'
),
lcr_cons as (
select riad_code,
reported_period,
cons_level,
lei,
c_7600a_r0030_c0010
from crp_suba06.suba_c_7600a
where cons_level = 'CONS'
),
lcr_solo as (
select riad_code,
reported_period,
cons_level,
lei,
c_7600a_r0030_c0010
from crp_suba06.suba_c_7600a
where cons_level = 'SOLO'
),
nsfr_cons as (
select riad_code,
reported_period,
cons_level,
lei,
c_8400a_r0220_c0040
from crp_suba06.suba_c_8400a
where cons_level = 'CONS'
),
nsfr_solo as (
select riad_code,
reported_period,
cons_level,
lei,
c_8400a_r0220_c0040
from crp_suba06.suba_c_8400a
where cons_level = 'SOLO'
),
liq_subgroups as (
SELECT entity_id,
substr(entity_id, 0, 20) as short_id,
name,
start_date,
end_date
from crp_suba06.suba_entity_master_data_stage2
where substr(entity_id, -12) = 'CRDLIQSUBGRP'
),
liq_subgroup_lcr as (
select liq_subgroups.entity_id,
short_id,
liq_subgroups.name,
start_date,
end_date,
suba_c_7600a.reported_period,
c_7600a_r0030_c0010
from liq_subgroups
left join crp_suba06.suba_c_7600a on liq_subgroups.entity_id = suba_c_7600a.entity_id
where start_date <= reported_period
and end_date >= reported_period
),
liq_subgroup_nsfr as (
select liq_subgroups.entity_id,
short_id,
liq_subgroups.name,
start_date,
end_date,
suba_c_8400a.reported_period,
c_8400a_r0220_c0040
from liq_subgroups
left join crp_suba06.suba_c_8400a on liq_subgroups.entity_id = suba_c_8400a.entity_id
where start_date <= reported_period
and end_date >= reported_period
),
liq_subgroup_data_riad as (
select liq_subgroup_lcr.entity_id,
liq_subgroup_lcr.short_id,
liq_subgroup_lcr.name,
liq_subgroup_lcr.start_date,
liq_subgroup_lcr.end_date,
liq_subgroup_lcr.reported_period,
c_7600a_r0030_c0010 as c_7600a_r0030_c0010_subgr,
c_8400a_r0220_c0040 as c_8400a_r0220_c0040_subgr,
riad_code
from liq_subgroup_lcr
left join crp_suba06.suba_entity_master_data_stage2 on liq_subgroup_lcr.short_id = suba_entity_master_data_stage2.entity_id
and suba_entity_master_data_stage2.start_date <= liq_subgroup_lcr.reported_period
and suba_entity_master_data_stage2.end_date >= liq_subgroup_lcr.reported_period
left join liq_subgroup_nsfr on liq_subgroup_lcr.entity_id = liq_subgroup_nsfr.entity_id
and liq_subgroup_lcr.reported_period = liq_subgroup_nsfr.reported_period
),
all_dates as (
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_0100
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_0300
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_0200
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_0200a
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_f_0101
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_4700
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_7600a
union
select riad_code,
reported_period,
reception_date
from crp_suba06.suba_c_8400a
union
select riad_code,
reference_date,
cast(changed_date as STRING) as reception_date
from suba_reporting_requirements_temp
where template_id in (
'tgC_03.00',
'tgF_00.01',
'tgC_47.00',
'tgC_76.00',
'tgC_84.00'
)
and reference_date in (
select distinct obligationmodulereferencedate
from SCOPF
)
),
max_dates as (
select reported_period,
riad_code,
max(reception_date) as receivedfilereceiveddate
from all_dates
group by reported_period,
riad_code
)
select SCOPF.obligationmodulereferencedate,
cast(null as String) as reportingentitycollectionuniqueid,
cast(NULL as DECIMAL(38, 10)) as receivedfileversionnumber,
max_dates.receivedfilereceiveddate,
cast(NULL as STRING) as revalidationdate,
SCOPF.ann_1_2_ref_date as ref_date,
concat(SCOPF.mfi_id, SCOPF.legal_entity_id) as inst_comp_key,
SCOPF.mfi_id,
SCOPF.legal_entity_id,
SCOPF.inst_name,
'EUR' as currency,
cast(NULL as STRING) as reported_by_supervisor,
cast(NULL as STRING) as confirmed_by_supervisor,
coalesce(EXP_COREP_CONS.EXP_COREP_CONS, 'N') as EXP_COREP_CONS,
coalesce (EXP_COREP_SOLO.EXP_COREP_SOLO, 'N') as EXP_COREP_SOLO,
coalesce (EXP_FINREP_CONS.EXP_FINREP_CONS, 'N') as EXP_FINREP_CONS,
coalesce (EXP_FINREP_SOLO.EXP_FINREP_SOLO, 'N') as EXP_FINREP_SOLO,
coalesce(EXP_LEV_CONS.EXP_LEV_CONS, 'none') as EXP_LEV_CONS,
coalesce (EXP_LEV_SOLO.EXP_LEV_SOLO, 'none') as EXP_LEV_SOLO,
coalesce (EXP_LCR_CONS.EXP_LCR_CONS, 'N') as EXP_LCR_CONS,
coalesce (EXP_LCR_SOLO.EXP_LCR_SOLO, 'N') as EXP_LCR_SOLO,
coalesce (EXP_NSFR_CONS.EXP_NSFR_CONS, 'N') as EXP_NSFR_CONS,
coalesce (EXP_NSFR_SOLO.EXP_NSFR_SOLO, 'N') as EXP_NSFR_SOLO,
cast(
corep_cons_c100.c_0100_r0020_c0010 as DECIMAL(38, 10)
) as cons_cet1_amt,
cast(
corep_cons_c100.c_0100_r0015_c0010 as DECIMAL(38, 10)
) as cons_tier1_amt,
cast(
corep_cons_c100.c_0100_r0010_c0010 as DECIMAL(38, 10)
) as cons_tot_cap_amt,
cast(
corep_cons_c300.c_0300_r0010_c0010 as DECIMAL(38, 10)
) as cons_cet1_ratio,
cast(
corep_cons_c300.c_0300_r0030_c0010 as DECIMAL(38, 10)
) as cons_tier1_ratio,
cast(
corep_cons_c300.c_0300_r0050_c0010 as DECIMAL(38, 10)
) as cons_tot_cap_ratio,
cast(
corep_cons_c200.c_0200_r0010_c0010 as DECIMAL(38, 10)
) as cons_risk_wght_assets,
cast(
corep_solo_c100.c_0100_r0020_c0010 as DECIMAL(38, 10)
) as solo_cet1_amt,
cast(
corep_solo_c100.c_0100_r0015_c0010 as DECIMAL(38, 10)
) as solo_tier1_amt,
cast(
corep_solo_c100.c_0100_r0010_c0010 as DECIMAL(38, 10)
) as solo_tot_cap_amt,
cast(
corep_solo_c300.c_0300_r0010_c0010 as DECIMAL(38, 10)
) as solo_cet1_ratio,
cast(
corep_solo_c300.c_0300_r0030_c0010 as DECIMAL(38, 10)
) as solo_tier1_ratio,
cast(
corep_solo_c300.c_0300_r0050_c0010 as DECIMAL(38, 10)
) as solo_tot_cap_ratio,
cast(
corep_solo_c200.c_0200_r0010_c0010 as DECIMAL(38, 10)
) as solo_risk_wght_assets,
cast(finrep_cons.f_0101_r0380_c0010 as DECIMAL(38, 10)) as cons_tot_assets,
cast(finrep_solo.f_0101_r0380_c0010 as DECIMAL(38, 10)) as solo_tot_assets,
cast(lev_cons.c_4700_r0330_c0010 as DECIMAL(38, 10)) as cons_lev_ratio_full,
cast(lev_cons.c_4700_r0340_c0010 as DECIMAL(38, 10)) as cons_lev_ratio_trans,
cast(lev_cons.c_4700_r0410_c0010 as DECIMAL(38, 10)) as cons_lev_ratio_req,
CASE
WHEN lev_cons.c_4700_r0410_c0010 IS NULL THEN NULL
WHEN lev_cons.c_4700_r0410_c0010 = 0.0300000000 THEN 'N'
ELSE 'Y'
END AS cons_lev_ratio_adj,
cast(lev_solo.c_4700_r0330_c0010 as DECIMAL(38, 10)) as solo_lev_ratio_full,
cast(lev_solo.c_4700_r0340_c0010 as DECIMAL(38, 10)) as solo_lev_ratio_trans,
cast(lev_solo.c_4700_r0410_c0010 as DECIMAL(38, 10)) as solo_lev_ratio_req,
CASE
WHEN lev_solo.c_4700_r0410_c0010 IS NULL THEN NULL
WHEN lev_solo.c_4700_r0410_c0010 = 0.0300000000 THEN 'N'
ELSE 'Y'
END AS solo_lev_ratio_adj,
cast(
case
WHEN mfi_id = liq_subgroup_data_riad.riad_code Then c_7600a_r0030_c0010_subgr
ELSE lcr_cons.c_7600a_r0030_c0010
END as DECIMAL(38, 10)
) as cons_lc_ratio,
cast(lcr_solo.c_7600a_r0030_c0010 as DECIMAL(38, 10)) as solo_lc_ratio,
cast(
case
WHEN mfi_id = liq_subgroup_data_riad.riad_code Then c_8400a_r0220_c0040_subgr
ELSE nsfr_cons.c_8400a_r0220_c0040
END as DECIMAL(38, 10)
) as cons_nsfr_ratio,
cast(nsfr_solo.c_8400a_r0220_c0040 as DECIMAL(38, 10)) as solo_nsfr_ratio,
cast(NULL as STRING) as submitter_comment,
cast(NULL as STRING) as datacollectioncode,
cast(NULL as STRING) as reportingcyclename,
cast(NULL as STRING) as reportingcyclestatus,
cast(NULL as STRING) as modulecode,
cast(NULL as DECIMAL(38, 10)) as moduleversionnumber,
cast(NULL as STRING) as reportingentityname,
cast(NULL as STRING) as entityattributecountry,
cast(NULL as STRING) as entitygroupentityname,
cast(NULL as STRING) as obligationmoduleremittancedate,
cast(NULL as STRING) as obligationmoduleexpected,
cast(NULL as DEcimal(38, 10)) as revalidationversionnumber,
cast(NULL as STRING) as receivedfilesystemfilename,
cast(NULL as STRING) as obligationstatusstatus,
cast(NULL as STRING) as filestatussetsubmissionstatus,
cast(NULL as STRING) as filestatussetvalidationstatus,
cast(NULL as DECIMAL(38, 10)) as numberoferrors,
cast(NULL as DECIMAL(38, 10)) as numberofwarnings,
cast(NULL as Decimal(38, 10)) as delayindays,
cast(NULL as DECIMAL(38, 10)) as failedattempts,
cast(NULL as STRING) as tablename,
cast(NULL as STRING) as tec_source_system,
cast(NULL as STRING) as tec_dataset,
cast(NULL as STRING) as tec_surrogate_key,
cast(NULL as STRING) as tec_crc,
cast(NULL as TIMESTAMP) as tec_ingestion_date,
cast(NULL as STRING) as tec_version_id,
cast(NULL as TIMESTAMP) as tec_execution_date,
cast(NULL as STRING) as tec_run_id,
cast(NULL as TIMESTAMP) as tec_business_date
from SCOPF
left join EXP_COREP_CONS on (
scopf.obligationmodulereferencedate = EXP_COREP_CONS.reference_date
and scopf.mfi_id = EXP_COREP_CONS.riad_code
)
left join EXP_COREP_SOLO on (
scopf.obligationmodulereferencedate = EXP_COREP_SOLO.reference_date
and scopf.mfi_id = EXP_COREP_SOLO.riad_code
)
left join EXP_FINREP_CONS on (
scopf.obligationmodulereferencedate = EXP_FINREP_CONS.reference_date
and scopf.mfi_id = EXP_FINREP_CONS.riad_code
)
left join EXP_FINREP_SOLO on (
scopf.obligationmodulereferencedate = EXP_FINREP_SOLO.reference_date
and scopf.mfi_id = EXP_FINREP_SOLO.riad_code
)
left join EXP_LEV_CONS on (
scopf.obligationmodulereferencedate = EXP_LEV_CONS.reference_date
and scopf.mfi_id = EXP_LEV_CONS.riad_code
)
left join EXP_LEV_SOLO on (
scopf.obligationmodulereferencedate = EXP_LEV_SOLO.reference_date
and scopf.mfi_id = EXP_LEV_SOLO.riad_code
)
left join EXP_LCR_CONS on (
scopf.obligationmodulereferencedate = EXP_LCR_CONS.reference_date
and scopf.mfi_id = EXP_LCR_CONS.riad_code
)
left join EXP_LCR_SOLO on (
scopf.obligationmodulereferencedate = EXP_LCR_SOLO.reference_date
and scopf.mfi_id = EXP_LCR_SOLO.riad_code
)
left join EXP_NSFR_CONS on (
scopf.obligationmodulereferencedate = EXP_NSFR_CONS.reference_date
and scopf.mfi_id = EXP_NSFR_CONS.riad_code
)
left join EXP_NSFR_SOLO on (
scopf.obligationmodulereferencedate = EXP_NSFR_SOLO.reference_date
and scopf.mfi_id = EXP_NSFR_SOLO.riad_code
)
left join corep_cons_c100 on (
SCOPF.obligationmodulereferencedate = corep_cons_c100.reported_period
and SCOPF.mfi_id = corep_cons_c100.riad_code
)
left join corep_cons_c300 on (
SCOPF.obligationmodulereferencedate = corep_cons_c300.reported_period
and SCOPF.mfi_id = corep_cons_c300.riad_code
)
left join corep_cons_c200 on (
SCOPF.obligationmodulereferencedate = corep_cons_c200.reported_period
and SCOPF.mfi_id = corep_cons_c200.riad_code
)
left join corep_solo_c100 on (
SCOPF.obligationmodulereferencedate = corep_solo_c100.reported_period
and SCOPF.mfi_id = corep_solo_c100.riad_code
)
left join corep_solo_c200 on (
SCOPF.obligationmodulereferencedate = corep_solo_c200.reported_period
and SCOPF.mfi_id = corep_solo_c200.riad_code
)
left join corep_solo_c300 on (
SCOPF.obligationmodulereferencedate = corep_solo_c300.reported_period
and SCOPF.mfi_id = corep_solo_c300.riad_code
)
left join finrep_cons on (
SCOPF.obligationmodulereferencedate = finrep_cons.reported_period
and SCOPF.mfi_id = finrep_cons.riad_code
)
left join finrep_solo on (
SCOPF.obligationmodulereferencedate = finrep_solo.reported_period
and SCOPF.mfi_id = finrep_solo.riad_code
)
left join lev_cons on (
SCOPF.obligationmodulereferencedate = lev_cons.reported_period
and SCOPF.mfi_id = lev_cons.riad_code
)
left join lev_solo on (
SCOPF.obligationmodulereferencedate = lev_solo.reported_period
and SCOPF.mfi_id = lev_solo.riad_code
)
left join lcr_cons on (
SCOPF.obligationmodulereferencedate = lcr_cons.reported_period
and SCOPF.mfi_id = lcr_cons.riad_code
)
left join lcr_solo on (
SCOPF.obligationmodulereferencedate = lcr_solo.reported_period
and SCOPF.mfi_id = lcr_solo.riad_code
)
left join nsfr_cons on (
SCOPF.obligationmodulereferencedate = nsfr_cons.reported_period
and SCOPF.mfi_id = nsfr_cons.riad_code
)
left join nsfr_solo on (
SCOPF.obligationmodulereferencedate = nsfr_solo.reported_period
and SCOPF.mfi_id = nsfr_solo.riad_code
)
left join liq_subgroup_data_riad on (
SCOPF.obligationmodulereferencedate = liq_subgroup_data_riad.reported_period
and SCOPF.mfi_id = liq_subgroup_data_riad.riad_code
)
left join max_dates on (
SCOPF.obligationmodulereferencedate = max_dates.reported_period
and SCOPF.mfi_id = max_dates.riad_code
)
order by 1,2"

View File

@@ -0,0 +1,259 @@
# static configs
tmpdir: /tmp
inbox_prefix: INBOX/RQSD/RQSD_PROCESS
archive_prefix: ARCHIVE/RQSD/RQSD_PROCESS
workflow_name: w_ODS_RQSD_PROCESS_DEVO
validation_schema_path: None
file_type: csv
# task configs
tasks:
- task_name: m_ODS_RQSD_SUBA_DEVO_PARSE
ods_prefix: INBOX/RQSD/RQSD_PROCESS/RQSD_SUBA_DEVO
output_table: RQSD_SUBA_DEVO
output_columns:
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'obligationmodulereferencedate'
column_header: 'obligationmodulereferencedate'
- type: 'csv_header'
value: 'reportingentitycollectionuniqueid'
column_header: 'reportingentitycollectionuniqueid'
- type: 'csv_header'
value: 'receivedfileversionnumber'
column_header: 'receivedfileversionnumber'
- type: 'csv_header'
value: 'receivedfilereceiveddate'
column_header: 'receivedfilereceiveddate'
- type: 'csv_header'
value: 'revalidationdate'
column_header: 'revalidationdate'
- type: 'csv_header'
value: 'ref_date'
column_header: 'ref_date'
- type: 'csv_header'
value: 'inst_comp_key'
column_header: 'inst_comp_key'
- type: 'csv_header'
value: 'mfi_id'
column_header: 'mfi_id'
- type: 'csv_header'
value: 'legal_entity_id'
column_header: 'legal_entity_id'
- type: 'csv_header'
value: 'inst_name'
column_header: 'inst_name'
- type: 'csv_header'
value: 'currency'
column_header: 'currency'
- type: 'csv_header'
value: 'reported_by_supervisor'
column_header: 'reported_by_supervisor'
- type: 'csv_header'
value: 'confirmed_by_supervisor'
column_header: 'confirmed_by_supervisor'
- type: 'csv_header'
value: 'exp_corep_cons'
column_header: 'exp_corep_cons'
- type: 'csv_header'
value: 'exp_corep_solo'
column_header: 'exp_corep_solo'
- type: 'csv_header'
value: 'exp_finrep_cons'
column_header: 'exp_finrep_cons'
- type: 'csv_header'
value: 'exp_finrep_solo'
column_header: 'exp_finrep_solo'
- type: 'csv_header'
value: 'exp_lev_cons'
column_header: 'exp_lev_cons'
- type: 'csv_header'
value: 'exp_lev_solo'
column_header: 'exp_lev_solo'
- type: 'csv_header'
value: 'exp_lcr_cons'
column_header: 'exp_lcr_cons'
- type: 'csv_header'
value: 'exp_lcr_solo'
column_header: 'exp_lcr_solo'
- type: 'csv_header'
value: 'exp_nsfr_cons'
column_header: 'exp_nsfr_cons'
- type: 'csv_header'
value: 'exp_nsfr_solo'
column_header: 'exp_nsfr_solo'
- type: 'csv_header'
value: 'cons_cet1_amt'
column_header: 'cons_cet1_amt'
- type: 'csv_header'
value: 'cons_tier1_amt'
column_header: 'cons_tier1_amt'
- type: 'csv_header'
value: 'cons_tot_cap_amt'
column_header: 'cons_tot_cap_amt'
- type: 'csv_header'
value: 'cons_cet1_ratio'
column_header: 'cons_cet1_ratio'
- type: 'csv_header'
value: 'cons_tier1_ratio'
column_header: 'cons_tier1_ratio'
- type: 'csv_header'
value: 'cons_tot_cap_ratio'
column_header: 'cons_tot_cap_ratio'
- type: 'csv_header'
value: 'cons_risk_wght_assets'
column_header: 'cons_risk_wght_assets'
- type: 'csv_header'
value: 'solo_cet1_amt'
column_header: 'solo_cet1_amt'
- type: 'csv_header'
value: 'solo_tier1_amt'
column_header: 'solo_tier1_amt'
- type: 'csv_header'
value: 'solo_tot_cap_amt'
column_header: 'solo_tot_cap_amt'
- type: 'csv_header'
value: 'solo_cet1_ratio'
column_header: 'solo_cet1_ratio'
- type: 'csv_header'
value: 'solo_tier1_ratio'
column_header: 'solo_tier1_ratio'
- type: 'csv_header'
value: 'solo_tot_cap_ratio'
column_header: 'solo_tot_cap_ratio'
- type: 'csv_header'
value: 'solo_risk_wght_assets'
column_header: 'solo_risk_wght_assets'
- type: 'csv_header'
value: 'cons_tot_assets'
column_header: 'cons_tot_assets'
- type: 'csv_header'
value: 'solo_tot_assets'
column_header: 'solo_tot_assets'
- type: 'csv_header'
value: 'cons_lev_ratio_full'
column_header: 'cons_lev_ratio_full'
- type: 'csv_header'
value: 'cons_lev_ratio_trans'
column_header: 'cons_lev_ratio_trans'
- type: 'csv_header'
value: 'cons_lev_ratio_req'
column_header: 'cons_lev_ratio_req'
- type: 'csv_header'
value: 'cons_lev_ratio_adj'
column_header: 'cons_lev_ratio_adj'
- type: 'csv_header'
value: 'solo_lev_ratio_full'
column_header: 'solo_lev_ratio_full'
- type: 'csv_header'
value: 'solo_lev_ratio_trans'
column_header: 'solo_lev_ratio_trans'
- type: 'csv_header'
value: 'solo_lev_ratio_req'
column_header: 'solo_lev_ratio_req'
- type: 'csv_header'
value: 'solo_lev_ratio_adj'
column_header: 'solo_lev_ratio_adj'
- type: 'csv_header'
value: 'cons_lc_ratio'
column_header: 'cons_lc_ratio'
- type: 'csv_header'
value: 'solo_lc_ratio'
column_header: 'solo_lc_ratio'
- type: 'csv_header'
value: 'cons_nsfr_ratio'
column_header: 'cons_nsfr_ratio'
- type: 'csv_header'
value: 'solo_nsfr_ratio'
column_header: 'solo_nsfr_ratio'
- type: 'csv_header'
value: 'submitter_comment'
column_header: 'submitter_comment'
- type: 'csv_header'
value: 'datacollectioncode'
column_header: 'datacollectioncode'
- type: 'csv_header'
value: 'reportingcyclename'
column_header: 'reportingcyclename'
- type: 'csv_header'
value: 'reportingcyclestatus'
column_header: 'reportingcyclestatus'
- type: 'csv_header'
value: 'modulecode'
column_header: 'modulecode'
- type: 'csv_header'
value: 'moduleversionnumber'
column_header: 'moduleversionnumber'
- type: 'csv_header'
value: 'reportingentityname'
column_header: 'reportingentityname'
- type: 'csv_header'
value: 'entityattributecountry'
column_header: 'entityattributecountry'
- type: 'csv_header'
value: 'entitygroupentityname'
column_header: 'entitygroupentityname'
- type: 'csv_header'
value: 'obligationmoduleremittancedate'
column_header: 'obligationmoduleremittancedate'
- type: 'csv_header'
value: 'obligationmoduleexpected'
column_header: 'obligationmoduleexpected'
- type: 'csv_header'
value: 'revalidationversionnumber'
column_header: 'revalidationversionnumber'
- type: 'csv_header'
value: 'receivedfilesystemfilename'
column_header: 'receivedfilesystemfilename'
- type: 'csv_header'
value: 'obligationstatusstatus'
column_header: 'obligationstatusstatus'
- type: 'csv_header'
value: 'filestatussetsubmissionstatus'
column_header: 'filestatussetsubmissionstatus'
- type: 'csv_header'
value: 'filestatussetvalidationstatus'
column_header: 'filestatussetvalidationstatus'
- type: 'csv_header'
value: 'numberoferrors'
column_header: 'numberoferrors'
- type: 'csv_header'
value: 'numberofwarnings'
column_header: 'numberofwarnings'
- type: 'csv_header'
value: 'delayindays'
column_header: 'delayindays'
- type: 'csv_header'
value: 'failedattempts'
column_header: 'failedattempts'
- type: 'csv_header'
value: 'tablename'
column_header: 'tablename'
- type: 'csv_header'
value: 'tec_source_system'
column_header: 'tec_source_system'
- type: 'csv_header'
value: 'tec_dataset'
column_header: 'tec_dataset'
- type: 'csv_header'
value: 'tec_surrogate_key'
column_header: 'tec_surrogate_key'
- type: 'csv_header'
value: 'tec_crc'
column_header: 'tec_crc'
- type: 'csv_header'
value: 'tec_ingestion_date'
column_header: 'tec_ingestion_date'
- type: 'csv_header'
value: 'tec_version_id'
column_header: 'tec_version_id'
- type: 'csv_header'
value: 'tec_execution_date'
column_header: 'tec_execution_date'
- type: 'csv_header'
value: 'tec_run_id'
column_header: 'tec_run_id'
- type: 'csv_header'
value: 'tec_business_date'
column_header: 'tec_business_date'

View File

@@ -0,0 +1,524 @@
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from datetime import datetime, timedelta
import logging
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from devo_connector import DevoConnector
from mrds.core import main as mrds_main
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
TASK_CONFIGS = {
"m_ODS_RQSD_OBSERVATIONS": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_OBSERVATIONS.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS_PARSE.yaml"
},
"m_ODS_RQSD_FX": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_FX.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX_PARSE.yaml"
},
"m_ODS_RQSD_SUBA_DEVO": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_SUBA_DEVO.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO_PARSE.yaml"
}
}
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
dag_id = os.path.splitext(os.path.basename(__file__))[0]
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id
}
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Run devo RQSD data ingestion workflow with conditional MRDS processing - Multi-task',
#schedule_interval='*/10 * * * *', # every 10 minutes
schedule_interval=None,
catchup=False,
max_active_runs=1,
tags=["Devo", "RQSD", "MRDS", "Connector", "Multi-Task"]
) as dag:
def init_workflow_task(**context):
try:
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env_vars = {
'MRDS_ENV': os.getenv("MRDS_ENV"),
'MRDS_LOADER_DB_USER': os.getenv("MRDS_LOADER_DB_USER"),
'MRDS_LOADER_DB_PASS': '***MASKED***' if os.getenv("MRDS_LOADER_DB_PASS") else None,
'MRDS_LOADER_DB_TNS': os.getenv("MRDS_LOADER_DB_TNS"),
'BUCKET_NAMESPACE': os.getenv("BUCKET_NAMESPACE"),
}
for key, value in env_vars.items():
logging.info(f"{key}: {value}")
run_id_value = get_verified_run_id(context)
logging.info(f"Task ID: {context.get('task_instance_key_str', 'N/A')}")
logging.info(f"Run ID: {run_id_value}")
logging.info(f"Execution Date: {context.get('execution_date', 'N/A')}")
dag_obj = context.get('dag')
dag_id_str = dag_obj.dag_id if dag_obj else 'N/A'
logging.info(f"DAG ID: {dag_id_str}")
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing_vars = []
if not username:
missing_vars.append("MRDS_LOADER_DB_USER")
if not password:
missing_vars.append("MRDS_LOADER_DB_PASS")
if not tnsalias:
missing_vars.append("MRDS_LOADER_DB_TNS")
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
logging.error(error_msg)
raise ValueError(error_msg)
logging.info(f"Initializing {workflow_name} workflow for env '{env}'")
workflow_run_id = run_id_value
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
ti = context['ti']
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
ti.xcom_push(key='workflow_context', value=workflow_context)
ti.xcom_push(key='env', value=env)
logging.info("Workflow initialization completed successfully")
except Exception as e:
logging.error(f"Error initializing workflow: {e}", exc_info=True)
raise
def run_devo_connector(**context):
ti = context['ti']
task_id = context['task'].task_id
try:
if task_id.startswith('devo_'):
task_name = task_id.replace('devo_', '')
else:
task_name = task_id
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
error_msg = f"No configuration found for task: {task_name}"
logging.error(error_msg)
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
ti.xcom_push(key='error_message', value=error_msg)
raise ValueError(error_msg)
flow_config_path = task_config["flow_config_path"]
env_config_path = task_config["env_config_path"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
env = ti.xcom_pull(key='env', task_ids='init_workflow')
if not workflow_context:
error_msg = "No workflow_context from init task"
logging.error(error_msg)
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
ti.xcom_push(key='error_message', value=error_msg)
raise ValueError(error_msg)
logging.info(f"Starting Devo connector for env '{env}' - {task_name}")
devo_connector = DevoConnector(
flow_config_path=flow_config_path,
env_config_path=env_config_path,
env=env,
logger=logging.getLogger(f"devo_connector_{env}_{task_name}")
)
row_count = devo_connector.run(workflow_context)
logging.info(f"Devo connector completed successfully for {task_name}. Processed {row_count} rows.")
ti.xcom_push(key='row_count', value=row_count)
ti.xcom_push(key='devo_success', value=True)
ti.xcom_push(key='should_run_mrds', value=row_count > 0)
ti.xcom_push(key='error_message', value=None)
return row_count
except Exception as e:
error_msg = f"Error running Devo connector: {str(e)}"
logging.error(error_msg, exc_info=True)
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
ti.xcom_push(key='error_message', value=error_msg)
raise
def check_should_run_mrds(**context):
ti = context['ti']
dag_run = context['dag_run']
task_id = context['task'].task_id
if task_id.startswith('check_'):
task_name = task_id.replace('check_', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
devo_task_instance = dag_run.get_task_instance(devo_task_id)
logging.info(f"Devo task {devo_task_id} state: {devo_task_instance.state}")
if devo_task_instance.state == 'failed':
error_msg = ti.xcom_pull(key='error_message', task_ids=devo_task_id)
logging.info(f"Devo connector failed for {task_name} - skipping MRDS task. Error: {error_msg}")
raise AirflowSkipException(f"Devo connector failed for {task_name}")
should_run_mrds = ti.xcom_pull(key='should_run_mrds', task_ids=devo_task_id)
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
if task_name == "m_ODS_RQSD_OBSERVATIONS" and (not should_run_mrds or row_count == 0):
logging.info(f"OBSERVATIONS task has no data (row_count: {row_count}) - marking to skip all subsequent tasks")
ti.xcom_push(key='skip_all_tasks', value=True)
raise AirflowSkipException(f"No OBSERVATIONS data found (row_count: {row_count}) - skipping all subsequent processing")
if not should_run_mrds or row_count == 0:
logging.info(f"Skipping MRDS task for {task_name} - row count: {row_count}")
raise AirflowSkipException(f"No data to process for {task_name} (row_count: {row_count})")
logging.info(f"MRDS task should run for {task_name} - row count: {row_count}")
return True
def check_should_run_parallel_tasks(**context):
ti = context['ti']
task_id = context['task'].task_id
if task_id.startswith('check_'):
task_name = task_id.replace('check_', '')
else:
task_name = task_id
skip_all_tasks = ti.xcom_pull(key='skip_all_tasks', task_ids='check_m_ODS_RQSD_OBSERVATIONS')
if skip_all_tasks:
logging.info(f"Skipping {task_name} - OBSERVATIONS task had no data")
raise AirflowSkipException(f"Skipping {task_name} - OBSERVATIONS task had no data")
return check_should_run_mrds(**context)
def run_mrds_task(**context):
ti = context['ti']
task_id = context['task'].task_id
try:
if task_id.endswith('_PARSE'):
task_name = task_id.replace('_PARSE', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
source_filename = task_config["source_filename"]
config_file = task_config["config_file"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
if not workflow_context:
raise ValueError("No workflow_context from init task")
logging.info(f"Starting MRDS task for {task_name} with workflow context: {workflow_context}")
logging.info(f"Processing {row_count} rows from Devo connector")
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
logging.info(f"MRDS task completed successfully for {task_name}")
return "SUCCESS"
except Exception as e:
logging.error(f"Error running MRDS task: {e}", exc_info=True)
raise
def check_success_for_mopdb(**context):
try:
ti = context['ti']
dag_run = context['dag_run']
has_failures = False
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: MRDS task failed")
if has_failures:
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
all_skipped = all(
dag_run.get_task_instance(f'{task_name}_PARSE').state == 'skipped'
for task_name in TASK_CONFIGS.keys()
)
if all_skipped:
error_msg = "All MRDS tasks were skipped (no data to process) - skipping MOPDB trigger"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
return "SUCCESS"
except AirflowSkipException:
raise
except Exception as e:
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
def end_log_table_task(**context):
try:
logging.info("End log table task - always runs at the end")
return "SUCCESS"
except Exception as e:
logging.error(f"Error in end log table task: {e}", exc_info=True)
raise
def finalise_workflow_task(**context):
a_workflow_history_key = None
try:
ti = context['ti']
dag_run = context['dag_run']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
workflow_success = True
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: MRDS task failed")
elif mrds_task.state == 'skipped':
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
devo_success = ti.xcom_pull(key='devo_success', task_ids=devo_task_id)
if devo_success and row_count == 0:
logging.info(f"{task_name} - MRDS task was skipped due to no data - this is normal")
elif not devo_success:
workflow_success = False
failure_reasons.append(f"{task_name}: Devo connector failed, MRDS skipped")
trigger_mopdb_task = dag_run.get_task_instance('trigger_mopdb_dag')
if trigger_mopdb_task.state == 'failed':
workflow_success = False
failure_reasons.append("MOPDB trigger failed")
end_log_task = dag_run.get_task_instance('end_log_table')
if end_log_task.state == 'failed':
workflow_success = False
failure_reasons.append("End log table failed")
if workflow_success:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
else:
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error(f"Finalised workflow with history key {a_workflow_history_key} as FAILED")
logging.error(f"Failure reasons: {', '.join(failure_reasons)}")
raise AirflowFailException(f"Workflow failed: {', '.join(failure_reasons)}")
except AirflowFailException:
raise
except Exception as e:
logging.error(f"Error finalizing workflow: {e}", exc_info=True)
try:
if a_workflow_history_key:
mrds_finalise_workflow(a_workflow_history_key, "N")
except:
pass
raise AirflowFailException(f"Workflow finalization failed: {e}")
init_workflow = PythonOperator(
task_id='init_workflow',
python_callable=init_workflow_task,
provide_context=True,
retries=0,
)
check_mopdb = PythonOperator(
task_id='check_success_for_mopdb',
python_callable=check_success_for_mopdb,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
trigger_mopdb_dag = TriggerDagRunOperator(
task_id='trigger_mopdb_dag',
trigger_dag_id='w_MOPDB_RQSD_PROCESS',
wait_for_completion=False,
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
retries=0,
)
end_log_table = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
task_names = list(TASK_CONFIGS.keys())
first_task_name = task_names[0]
parallel_task_names = task_names[1:]
first_devo_task = PythonOperator(
task_id=f'devo_{first_task_name}',
python_callable=run_devo_connector,
provide_context=True,
retries=0,
)
first_check_task = PythonOperator(
task_id=f'check_{first_task_name}',
python_callable=check_should_run_mrds,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
first_mrds_task = PythonOperator(
task_id=f'{first_task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
retries=0,
)
parallel_tasks = []
for task_name in parallel_task_names:
devo_task = PythonOperator(
task_id=f'devo_{task_name}',
python_callable=run_devo_connector,
provide_context=True,
retries=0,
)
check_task = PythonOperator(
task_id=f'check_{task_name}',
python_callable=check_should_run_parallel_tasks,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
mrds_task = PythonOperator(
task_id=f'{task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
retries=0,
)
first_mrds_task >> devo_task >> check_task >> mrds_task
parallel_tasks.extend([devo_task, check_task, mrds_task])
init_workflow >> first_devo_task >> first_check_task >> first_mrds_task
all_final_tasks = [first_mrds_task]
for task_name in parallel_task_names:
mrds_task_id = f'{task_name}_PARSE'
for task in parallel_tasks:
if task.task_id == mrds_task_id:
all_final_tasks.append(task)
break
for task in all_final_tasks:
task >> check_mopdb
check_mopdb >> end_log_table >> trigger_mopdb_dag >> finalize_workflow

View File

@@ -0,0 +1,424 @@
"""
DAG ID set to w_ODS_RQSD_PROCESS_MANUAL
Removed OBSERVATIONS from TASK_CONFIGS (only FX and SUBA_DEVO remain)
Updated run_devo_connector to always set should_run_mrds=True (no row count check)
Simplified check_should_run_mrds to always proceed if devo task succeeded (manual mode)
Removed special OBSERVATIONS handling logic
Trigger DAG ID changed to w_MOPDB_RQSD_PROCESS_MANUAL
Added "MANUAL" tag
Both FX and SUBA_DEVO tasks run in parallel from init
Removed skipped task handling for no data scenarios in finalize
"""
import sys
import os
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from datetime import datetime, timedelta
import logging
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
sys.path.append('/opt/airflow/python/connectors/devo')
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/rqsd')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from devo_connector import DevoConnector
from mrds.core import main as mrds_main
from mrds.utils.security_utils import get_verified_run_id, verify_run_id
TASK_CONFIGS = {
"m_ODS_RQSD_FX": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_FX.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_FX_PARSE.yaml"
},
"m_ODS_RQSD_SUBA_DEVO": {
"flow_config_path": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO.yaml",
"env_config_path": "/opt/airflow/python/connectors/devo/config/env_config_rqsd.yaml",
"source_filename": "RQSD_SUBA_DEVO.csv",
"config_file": "/opt/airflow/src/airflow/dags/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_SUBA_DEVO_PARSE.yaml"
}
}
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
dag_id = "w_ODS_RQSD_PROCESS_MANUAL"
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id
}
with DAG(
dag_id=dag_id,
default_args=default_args,
description='Manual trigger for RQSD data processing',
schedule_interval=None,
catchup=False,
tags=["Devo", "RQSD", "MRDS", "Connector", "Multi-Task", "MANUAL"]
) as dag:
def init_workflow_task(**context):
try:
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
env_vars = {
'MRDS_ENV': os.getenv("MRDS_ENV"),
'MRDS_LOADER_DB_USER': os.getenv("MRDS_LOADER_DB_USER"),
'MRDS_LOADER_DB_PASS': '***MASKED***' if os.getenv("MRDS_LOADER_DB_PASS") else None,
'MRDS_LOADER_DB_TNS': os.getenv("MRDS_LOADER_DB_TNS"),
'BUCKET_NAMESPACE': os.getenv("BUCKET_NAMESPACE"),
}
for key, value in env_vars.items():
logging.info(f"{key}: {value}")
run_id_value = get_verified_run_id(context)
logging.info(f"Task ID: {context.get('task_instance_key_str', 'N/A')}")
logging.info(f"Run ID: {run_id_value}")
logging.info(f"Execution Date: {context.get('execution_date', 'N/A')}")
dag_obj = context.get('dag')
dag_id_str = dag_obj.dag_id if dag_obj else 'N/A'
logging.info(f"DAG ID: {dag_id_str}")
env = os.getenv("MRDS_ENV", "dev")
username = os.getenv("MRDS_LOADER_DB_USER")
password = os.getenv("MRDS_LOADER_DB_PASS")
tnsalias = os.getenv("MRDS_LOADER_DB_TNS")
if not all([username, password, tnsalias]):
missing_vars = []
if not username:
missing_vars.append("MRDS_LOADER_DB_USER")
if not password:
missing_vars.append("MRDS_LOADER_DB_PASS")
if not tnsalias:
missing_vars.append("MRDS_LOADER_DB_TNS")
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
logging.error(error_msg)
raise ValueError(error_msg)
logging.info(f"Initializing {workflow_name} workflow for env '{env}'")
workflow_run_id = run_id_value
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, workflow_run_id)
logging.info(f"Initialized workflow with history key: {a_workflow_history_key}")
workflow_context = {
"run_id": workflow_run_id,
"a_workflow_history_key": a_workflow_history_key
}
ti = context['ti']
ti.xcom_push(key='workflow_history_key', value=a_workflow_history_key)
ti.xcom_push(key='workflow_context', value=workflow_context)
ti.xcom_push(key='env', value=env)
logging.info("Workflow initialization completed successfully")
except Exception as e:
logging.error(f"Error initializing workflow: {e}", exc_info=True)
raise
def run_devo_connector(**context):
try:
ti = context['ti']
task_id = context['task'].task_id
if task_id.startswith('devo_'):
task_name = task_id.replace('devo_', '')
else:
task_name = task_id
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
flow_config_path = task_config["flow_config_path"]
env_config_path = task_config["env_config_path"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
env = ti.xcom_pull(key='env', task_ids='init_workflow')
if not workflow_context:
raise ValueError("No workflow_context from init task")
logging.info(f"Starting Devo connector for env '{env}' - {task_name}")
devo_connector = DevoConnector(
flow_config_path=flow_config_path,
env_config_path=env_config_path,
env=env,
logger=logging.getLogger(f"devo_connector_{env}_{task_name}")
)
row_count = devo_connector.run(workflow_context)
logging.info(f"Devo connector completed successfully for {task_name}. Processed {row_count} rows.")
ti.xcom_push(key='row_count', value=row_count)
ti.xcom_push(key='devo_success', value=True)
ti.xcom_push(key='should_run_mrds', value=True)
return row_count
except Exception as e:
logging.error(f"Error running Devo connector: {e}", exc_info=True)
ti = context['ti']
ti.xcom_push(key='row_count', value=0)
ti.xcom_push(key='devo_success', value=False)
ti.xcom_push(key='should_run_mrds', value=False)
raise
def check_should_run_mrds(**context):
ti = context['ti']
dag_run = context['dag_run']
task_id = context['task'].task_id
if task_id.startswith('check_'):
task_name = task_id.replace('check_', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
devo_task_instance = dag_run.get_task_instance(devo_task_id)
logging.info(f"Devo task state: {devo_task_instance.state}")
if devo_task_instance.state == 'failed':
logging.info(f"Devo connector failed for {task_name} - skipping MRDS task")
raise AirflowSkipException(f"Devo connector failed for {task_name}")
logging.info(f"MRDS task will run for {task_name} - manual trigger mode")
return True
def run_mrds_task(**context):
try:
ti = context['ti']
task_id = context['task'].task_id
if task_id.endswith('_PARSE'):
task_name = task_id.replace('_PARSE', '')
else:
task_name = task_id
devo_task_id = f'devo_{task_name}'
task_config = TASK_CONFIGS.get(task_name)
if not task_config:
raise ValueError(f"No configuration found for task: {task_name}")
source_filename = task_config["source_filename"]
config_file = task_config["config_file"]
workflow_context = ti.xcom_pull(key='workflow_context', task_ids='init_workflow')
row_count = ti.xcom_pull(key='row_count', task_ids=devo_task_id)
if not workflow_context:
raise ValueError("No workflow_context from init task")
logging.info(f"Starting MRDS task for {task_name} with workflow context: {workflow_context}")
logging.info(f"Processing {row_count} rows from Devo connector")
mrds_main(workflow_context, source_filename, config_file, generate_workflow_context=False)
logging.info(f"MRDS task completed successfully for {task_name}")
return "SUCCESS"
except Exception as e:
logging.error(f"Error running MRDS task: {e}", exc_info=True)
raise
def check_success_for_mopdb(**context):
try:
ti = context['ti']
dag_run = context['dag_run']
has_failures = False
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
has_failures = True
failure_reasons.append(f"{task_name}: MRDS task failed")
if has_failures:
error_msg = f"Tasks failed - skipping MOPDB trigger: {', '.join(failure_reasons)}"
logging.info(error_msg)
raise AirflowSkipException(error_msg)
logging.info("All tasks completed successfully - proceeding to trigger MOPDB")
return "SUCCESS"
except AirflowSkipException:
raise
except Exception as e:
logging.error(f"Error checking success for MOPDB: {e}", exc_info=True)
raise AirflowSkipException(f"Error checking success - skipping MOPDB trigger: {e}")
def end_log_table_task(**context):
try:
logging.info("End log table task - always runs at the end")
return "SUCCESS"
except Exception as e:
logging.error(f"Error in end log table task: {e}", exc_info=True)
raise
def finalise_workflow_task(**context):
try:
ti = context['ti']
dag_run = context['dag_run']
a_workflow_history_key = ti.xcom_pull(key='workflow_history_key', task_ids='init_workflow')
if a_workflow_history_key is None:
raise ValueError("No workflow history key found in XCom; cannot finalise workflow")
workflow_success = True
failure_reasons = []
for task_name in TASK_CONFIGS.keys():
devo_task_id = f'devo_{task_name}'
mrds_task_id = f'{task_name}_PARSE'
devo_task = dag_run.get_task_instance(devo_task_id)
mrds_task = dag_run.get_task_instance(mrds_task_id)
if devo_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: Devo connector failed")
if mrds_task.state == 'failed':
workflow_success = False
failure_reasons.append(f"{task_name}: MRDS task failed")
trigger_mopdb_task = dag_run.get_task_instance('trigger_mopdb_dag')
if trigger_mopdb_task.state == 'failed':
workflow_success = False
failure_reasons.append("MOPDB trigger failed")
end_log_task = dag_run.get_task_instance('end_log_table')
if end_log_task.state == 'failed':
workflow_success = False
failure_reasons.append("End log table failed")
if workflow_success:
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info(f"Finalised workflow with history key {a_workflow_history_key} as SUCCESS")
else:
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error(f"Finalised workflow with history key {a_workflow_history_key} as FAILED")
logging.error(f"Failure reasons: {', '.join(failure_reasons)}")
raise AirflowFailException(f"Workflow failed: {', '.join(failure_reasons)}")
except AirflowFailException:
raise
except Exception as e:
logging.error(f"Error finalizing workflow: {e}", exc_info=True)
try:
if 'a_workflow_history_key' in locals() and a_workflow_history_key:
mrds_finalise_workflow(a_workflow_history_key, "N")
except:
pass
raise AirflowFailException(f"Workflow finalization failed: {e}")
init_workflow = PythonOperator(
task_id='init_workflow',
python_callable=init_workflow_task,
provide_context=True,
)
check_mopdb = PythonOperator(
task_id='check_success_for_mopdb',
python_callable=check_success_for_mopdb,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
trigger_mopdb_dag = TriggerDagRunOperator(
task_id='trigger_mopdb_dag',
trigger_dag_id='w_MOPDB_RQSD_PROCESS_MANUAL',
wait_for_completion=False,
trigger_rule=TriggerRule.NONE_FAILED,
)
end_log_table = PythonOperator(
task_id='end_log_table',
python_callable=end_log_table_task,
trigger_rule=TriggerRule.ALL_DONE,
)
finalize_workflow = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
all_tasks = []
for task_name in TASK_CONFIGS.keys():
devo_task = PythonOperator(
task_id=f'devo_{task_name}',
python_callable=run_devo_connector,
provide_context=True,
)
check_task = PythonOperator(
task_id=f'check_{task_name}',
python_callable=check_should_run_mrds,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
)
mrds_task = PythonOperator(
task_id=f'{task_name}_PARSE',
python_callable=run_mrds_task,
provide_context=True,
)
devo_task >> check_task >> mrds_task
all_tasks.extend([devo_task, check_task, mrds_task])
devo_tasks = [task for task in all_tasks if task.task_id.startswith('devo_')]
mrds_tasks = [task for task in all_tasks if task.task_id.endswith('_PARSE')]
init_workflow >> devo_tasks
for mrds_task in mrds_tasks:
mrds_task >> check_mopdb
check_mopdb >> trigger_mopdb_dag >> end_log_table >> finalize_workflow