This commit is contained in:
Grzegorz Michalski
2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions

View File

View File

View File

@@ -0,0 +1,398 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/CentralizedSecuritiesDissemination
archive_prefix: ARCHIVE/CSDB/CentralizedSecuritiesDissemination
workflow_name: w_ODS_CSDB_DEBT
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_DEBT_PARSE
ods_prefix: INBOX/CSDB/CentralizedSecuritiesDissemination/CSDB_DEBT
output_table: CSDB_DEBT
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'Date last modified'
column_header: 'NEWUPDATED'
- type: 'csv_header'
value: 'Extraction date'
column_header: 'IDLOADDATE_DIM'
- type: 'csv_header'
value: 'ISIN code'
column_header: 'EXTERNALCODE_ISIN'
- type: 'csv_header'
value: 'National instrument code type'
column_header: 'EXTERNALCODETYPE_NC'
- type: 'csv_header'
value: 'National instrument code'
column_header: 'EXTERNALCODE_NATIONAL'
- type: 'csv_header'
value: 'Internal instrument code'
column_header: 'IDIRINSTRUMENT'
- type: 'csv_header'
value: 'Short name'
column_header: 'SHORTNAME'
- type: 'csv_header'
value: 'Bond duration'
column_header: 'VA_BONDDURATION'
- type: 'csv_header'
value: 'Debt type'
column_header: 'IDIRDEBTTYPE'
- type: 'csv_header'
value: 'Asset securitisation type'
column_header: 'IDIRASSETSECTYPE'
- type: 'csv_header'
value: 'CFI classification'
column_header: 'IDIRCLASSIFICATIONCODE_CFI'
- type: 'csv_header'
value: 'Instrument ESA 95 class'
column_header: 'IDIRCLASSIFICATIONCODE_ESAI'
- type: 'csv_header'
value: 'Instrument ESA 95 class - value type'
column_header: 'IDIRCLASSIFICATIONCODE_ESAI_DM'
- type: 'csv_header'
value: 'Nominal currency'
column_header: 'IDIRCURRENCY_NOMINAL'
- type: 'csv_header'
value: 'Amount issued'
column_header: 'AMOUNTISSUED'
- type: 'csv_header'
value: 'Amount outstanding'
column_header: 'AMOUNTOUTSTANDING'
- type: 'csv_header'
value: 'Amount outstanding in EUR'
column_header: 'AMOUNTOUTSTANDING_EUR'
- type: 'csv_header'
value: 'Pool factor'
column_header: 'POOLFACTOR'
- type: 'csv_header'
value: 'Issue price'
column_header: 'ISSUEPRICE'
- type: 'csv_header'
value: 'Issue date'
column_header: 'IDISSUEDATE'
- type: 'csv_header'
value: 'Coupon type'
column_header: 'IDIRCOUPONTYPE'
- type: 'csv_header'
value: 'Last Coupon frequency'
column_header: 'IDIRCOUPONFREQUENCY'
- type: 'csv_header'
value: 'Coupon currency'
column_header: 'IDIRCURRENCY_COUPON'
- type: 'csv_header'
value: 'Last Coupon rate'
column_header: 'COUPONRATE'
- type: 'csv_header'
value: 'Last Coupon date'
column_header: 'COUPONDATE'
- type: 'csv_header'
value: 'Redemption type'
column_header: 'IDIRREDEMPTIONTYPE'
- type: 'csv_header'
value: 'Redemption frequency'
column_header: 'IDIRREDEMPTIONFREQUENCY'
- type: 'csv_header'
value: 'Redemption currency'
column_header: 'IDIRCURRENCY_REDEMPTION'
- type: 'csv_header'
value: 'Redemption price'
column_header: 'REDEMPTIONPRICE'
- type: 'csv_header'
value: 'Maturity date'
column_header: 'IDMATURITYDATE'
- type: 'csv_header'
value: 'Issuer organisation alias type'
column_header: 'IDIRORGANISATIONALIASTYPE_IS'
- type: 'csv_header'
value: 'Issuer organisation alias code'
column_header: 'ISSUERSOURCECODE'
- type: 'csv_header'
value: 'Issuer MFI code'
column_header: 'ISSUEREXTERNALCODE_MFI'
- type: 'csv_header'
value: 'Issuer BIC code'
column_header: 'ISSUEREXTERNALCODE_BIC'
- type: 'csv_header'
value: 'Issuer BEI code'
column_header: 'ISSUEREXTERNALCODE_BEI'
- type: 'csv_header'
value: 'Internal organisation code'
column_header: 'IDIRORGANISATION_ISSUER'
- type: 'csv_header'
value: 'Issuer name'
column_header: 'ISSUERNAME'
- type: 'csv_header'
value: 'Issuer domicile country'
column_header: 'IDIRCOUNTRY'
- type: 'csv_header'
value: 'Issuer domicile country - value type'
column_header: 'IDIRCOUNTRY_DM'
- type: 'csv_header'
value: 'Issuer ESA 95 sector'
column_header: 'IDIRCLASSIFICATIONCODE_ESAO'
- type: 'csv_header'
value: 'Issuer ESA 95 sector - value type'
column_header: 'IDIRCLASSIFICATIONCODE_ESAO_DM'
- type: 'csv_header'
value: 'Issuer NACE sector'
column_header: 'IDIRCLASSIFICATIONCODE_NACE'
- type: 'csv_header'
value: 'Price date'
column_header: 'PUBLICATIONPRICEDATE'
- type: 'csv_header'
value: 'Price value'
column_header: 'PUBLICATIONPRICE'
- type: 'csv_header'
value: 'Price value - type'
column_header: 'PUBLICATIONPRICETYPE'
- type: 'csv_header'
value: 'Quotation basis'
column_header: 'PUBLICATIONPRICEQUOTATIONBASIS'
- type: 'csv_header'
value: 'Monthly average price'
column_header: 'MONTHLYAVERAGEPRICE'
- type: 'csv_header'
value: 'Accrual start date'
column_header: 'ACCRUALSTARTDATE'
- type: 'csv_header'
value: 'Accrued income factor'
column_header: 'DEBTACCRUALDEBTOR'
- type: 'csv_header'
value: 'Accrued income factor - value type'
column_header: 'DEBTACCRUALDEBTOR_DM'
- type: 'csv_header'
value: 'Accrued income (Creditor)'
column_header: 'DEBTACCRUALCREDITOR'
- type: 'csv_header'
value: 'Accrued income (Creditor) - value type'
column_header: 'DEBTACCRUALCREDITOR_TYP'
- type: 'csv_header'
value: 'Accrued interest'
column_header: 'ACCRUEDINTEREST'
- type: 'csv_header'
value: 'Yield to maturity'
column_header: 'YTMNONOPTIONADJUSTED'
- type: 'csv_header'
value: 'ESCB issuer identifier'
column_header: 'ESCB_ISSUER_IDENT'
- type: 'csv_header'
value: 'ESCB issuer identifier type'
column_header: 'VA_ESCBCODETYPE'
- type: 'csv_header'
value: 'Issuer compound ID'
column_header: 'IDUDCMPPARTY'
- type: 'csv_header'
value: 'Amount Oustanding type'
column_header: 'AMOUNTOUTSTANDINGTYPE'
- type: 'csv_header'
value: 'Market Capitalisation'
column_header: 'MARKETCAPITALISATION'
- type: 'csv_header'
value: 'Market Capitalisation in euro'
column_header: 'MARKETCAPITALISATION_EUR'
- type: 'csv_header'
value: 'Security Status'
column_header: 'VA_SECURITYSTATUS'
- type: 'csv_header'
value: 'Instrument suppl class'
column_header: 'VA_INSTRSUPPLEMENTARYCLASS'
- type: 'csv_header'
value: 'Residual maturity class'
column_header: 'VA_RESIDUALMATURITYCLASS'
- type: 'csv_header'
value: 'Is In SEC'
column_header: 'VA_ISINSEC'
- type: 'csv_header'
value: 'Is In EADB'
column_header: 'VA_ISELIGIBLEFOREADB'
- type: 'csv_header'
value: 'Instrument ESA 2010 class'
column_header: 'IDIRCLASSIFICATIONCODE_ESAI10'
- type: 'csv_header'
value: 'Issuer ESA 2010 sector'
column_header: 'IDIRCLASSIFICATIONCODE_ESAO10'
- type: 'csv_header'
value: 'Primary asset classification 2'
column_header: 'IDIRDEBTTYPE_N'
- type: 'csv_header'
value: 'Instruments seniority type'
column_header: 'SENIORITY'
- type: 'csv_header'
value: 'Issuer LEI code'
column_header: 'ISSUEREXTERNALCODE_LEI'
- type: 'csv_header'
value: 'Instrument ESA 2010 class - value type'
column_header: 'INSTR_ESA2010_CLASS_VALUETYPE'
- type: 'csv_header'
value: 'Issuer ESA 2010 class - value type'
column_header: 'ISS_ESA2010_CLASS_VALUETYPE'
- type: 'csv_header'
value: 'Security status date'
column_header: 'VA_SECURITYSTATUSDATE'
- type: 'csv_header'
value: 'Group type'
column_header: 'GROUP_TYPE'
- type: 'csv_header'
value: 'Has embedded option'
column_header: 'HASEMBEDDEDOPTION'
- type: 'csv_header'
value: 'Volume traded'
column_header: 'VOLUMETRADED'
- type: 'csv_header'
value: 'Primary listing name'
column_header: 'PRIMARYLISTINGNAME'
- type: 'csv_header'
value: 'Primary listing residency country'
column_header: 'PRIMARYLISTINGCOUNTRY'
- type: 'csv_header'
value: 'Instrument portfolio flags'
column_header: 'VA_INSTRPORTFLAGS'
- type: 'csv_header'
value: 'Residual maturity'
column_header: 'RESIDUALMATURITY'
- type: 'csv_header'
value: 'Original maturity'
column_header: 'ORIGINAL_MATURITY'
- type: 'csv_header'
value: 'CFIN classification'
column_header: 'IDIRCLASSIFICATIONCODE_CFIN'
- type: 'csv_header'
value: 'First scheduled Coupon date'
column_header: 'COUPONFIRSTPAYMENTDATE'
- type: 'csv_header'
value: 'Last scheduled Coupon date'
column_header: 'COUPONLASTPAYMENTDATE'
- type: 'csv_header'
value: 'Coupon rate underlying ISIN'
column_header: 'COUPONRATEUNDERLYINGCODE_ISIN'
- type: 'csv_header'
value: 'Coupon rate spread'
column_header: 'COUPONRATESPREAD'
- type: 'csv_header'
value: 'Coupon rate multiplier'
column_header: 'COUPONRATEMULTIPLIER'
- type: 'csv_header'
value: 'Coupon rate cap'
column_header: 'COUPONRATECAP'
- type: 'csv_header'
value: 'Coupon rate floor'
column_header: 'COUPONRATEFLOOR'
- type: 'csv_header'
value: 'Issue date tranche'
column_header: 'IDISSUEDATE_TRANCHE'
- type: 'csv_header'
value: 'Issue price tranche'
column_header: 'ISSUEPRICE_TRANCHE'
- type: 'csv_header'
value: 'Is private placement'
column_header: 'VA_ISPRIVATEPLACEMENT'
- type: 'csv_header'
value: 'RIAD code'
column_header: 'RIAD_CODE'
- type: 'csv_header'
value: 'RIAD OUID'
column_header: 'RIAD_OUID'
- type: 'csv_header'
value: 'ESG Flag 1'
column_header: 'ESG1'
- type: 'csv_header'
value: 'ESG Flag 2'
column_header: 'ESG2'
- type: 'csv_header'
value: 'ESG Flag 3'
column_header: 'ESG3'
- type: 'csv_header'
value: 'Strip'
column_header: 'STRIP'
- type: 'csv_header'
value: 'Depository receipt'
column_header: 'DEPOSITORY_RECEIPT'
- type: 'csv_header'
value: 'Rule 144A'
column_header: 'RULE_144A'
- type: 'csv_header'
value: 'Reg S'
column_header: 'REG_S'
- type: 'csv_header'
value: 'Warrant'
column_header: 'WARRANT'
- type: 'csv_header'
value: 'CSEC Relevance - stock'
column_header: 'CSEC_RELEVANCE_STOCK'
- type: 'csv_header'
value: 'CSEC relevance - gross issuance'
column_header: 'CSEC_RELEVANCE_GROSS_ISSUANCE'
- type: 'csv_header'
value: 'CSEC relevance - redemption'
column_header: 'CSEC_RELEVANCE_REDEMPTION'
- type: 'csv_header'
value: 'Accruing coupon'
column_header: 'ACCRUING_COUPON'
- type: 'csv_header'
value: 'Accruing discount'
column_header: 'ACCRUING_DISCOUNT'
- type: 'csv_header'
value: 'STEP Id'
column_header: 'STEPID'
- type: 'csv_header'
value: 'Program Name'
column_header: 'PROGRAMNAME'
- type: 'csv_header'
value: 'Program Ceiling'
column_header: 'PROGRAMCEILING'
- type: 'csv_header'
value: 'Program Status'
column_header: 'PROGRAMSTATUS'
- type: 'csv_header'
value: 'Issuer NACE21 sector'
column_header: 'ISSUERNACE21SECTOR'
- type: 'csv_header'
value: 'Instrument quotation basis'
column_header: 'INSTRUMENTQUOTATIONBASIS'
- type: 'csv_header'
value: 'placeholder 38'
column_header: 'PLACEHOLDER38'
- type: 'csv_header'
value: 'placeholder 39'
column_header: 'PLACEHOLDER39'
- type: 'csv_header'
value: 'placeholder 40'
column_header: 'PLACEHOLDER40'
- type: 'csv_header'
value: 'placeholder 41'
column_header: 'PLACEHOLDER41'
- type: 'csv_header'
value: 'placeholder 42'
column_header: 'PLACEHOLDER42'
- type: 'csv_header'
value: 'placeholder 43'
column_header: 'PLACEHOLDER43'
- type: 'csv_header'
value: 'placeholder 44'
column_header: 'PLACEHOLDER44'
- type: 'csv_header'
value: 'placeholder 45'
column_header: 'PLACEHOLDER45'
- type: 'csv_header'
value: 'placeholder 46'
column_header: 'PLACEHOLDER46'
- type: 'csv_header'
value: 'placeholder 47'
column_header: 'PLACEHOLDER47'
- type: 'csv_header'
value: 'placeholder 48'
column_header: 'PLACEHOLDER48'
- type: 'csv_header'
value: 'placeholder 49'
column_header: 'PLACEHOLDER49'
- type: 'csv_header'
value: 'placeholder 50'
column_header: 'PLACEHOLDER50'

View File

@@ -0,0 +1,400 @@
# App configurations
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination
archive_prefix: ARCHIVE/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination
workflow_name: w_ODS_CSDB_DEBT_DAILY
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_DEBT_DAILY_PARSE
ods_prefix: INBOX/CSDB/CentralizedSecuritiesDailyReferenceDataDissemination/CSDB_DEBT_DAILY
output_table: CSDB_DEBT_DAILY
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'Date last modified'
column_header: 'Date last modified'
- type: 'csv_header'
value: 'Extraction date'
column_header: 'Extraction date'
- type: 'csv_header'
value: 'ISIN code'
column_header: 'ISIN code'
- type: 'csv_header'
value: 'National instrument code type'
column_header: 'National instrument code type'
- type: 'csv_header'
value: 'National instrument code'
column_header: 'National instrument code'
- type: 'csv_header'
value: 'Internal instrument code'
column_header: 'Internal instrument code'
- type: 'csv_header'
value: 'Short name'
column_header: 'Short name'
- type: 'csv_header'
value: 'Bond duration'
column_header: 'Bond duration'
- type: 'csv_header'
value: 'Debt type'
column_header: 'Debt type'
- type: 'csv_header'
value: 'Asset securitisation type'
column_header: 'Asset securitisation type'
- type: 'csv_header'
value: 'CFI classification'
column_header: 'CFI classification'
- type: 'csv_header'
value: 'Instrument ESA 95 class'
column_header: 'Instrument ESA 95 class'
- type: 'csv_header'
value: 'Instrument ESA 95 class - value type'
column_header: 'Instrument ESA 95 class - value type'
- type: 'csv_header'
value: 'Nominal currency'
column_header: 'Nominal currency'
- type: 'csv_header'
value: 'Amount issued'
column_header: 'Amount issued'
- type: 'csv_header'
value: 'Amount outstanding'
column_header: 'Amount outstanding'
- type: 'csv_header'
value: 'Amount outstanding in EUR'
column_header: 'Amount outstanding in EUR'
- type: 'csv_header'
value: 'Pool factor'
column_header: 'Pool factor'
- type: 'csv_header'
value: 'Issue price'
column_header: 'Issue price'
- type: 'csv_header'
value: 'Issue date'
column_header: 'Issue date'
- type: 'csv_header'
value: 'Coupon type'
column_header: 'Coupon type'
- type: 'csv_header'
value: 'Last Coupon frequency'
column_header: 'Last Coupon frequency'
- type: 'csv_header'
value: 'Coupon currency'
column_header: 'Coupon currency'
- type: 'csv_header'
value: 'Last Coupon rate'
column_header: 'Last Coupon rate'
- type: 'csv_header'
value: 'Last Coupon date'
column_header: 'Last Coupon date'
- type: 'csv_header'
value: 'Redemption type'
column_header: 'Redemption type'
- type: 'csv_header'
value: 'Redemption frequency'
column_header: 'Redemption frequency'
- type: 'csv_header'
value: 'Redemption currency'
column_header: 'Redemption currency'
- type: 'csv_header'
value: 'Redemption price'
column_header: 'Redemption price'
- type: 'csv_header'
value: 'Maturity date'
column_header: 'Maturity date'
- type: 'csv_header'
value: 'Issuer organisation alias type'
column_header: 'Issuer organisation alias type'
- type: 'csv_header'
value: 'Issuer organisation alias code'
column_header: 'Issuer organisation alias code'
- type: 'csv_header'
value: 'Issuer MFI code'
column_header: 'Issuer MFI code'
- type: 'csv_header'
value: 'Issuer BIC code'
column_header: 'Issuer BIC code'
- type: 'csv_header'
value: 'Issuer BEI code'
column_header: 'Issuer BEI code'
- type: 'csv_header'
value: 'Internal organisation code'
column_header: 'Internal organisation code'
- type: 'csv_header'
value: 'Issuer name'
column_header: 'Issuer name'
- type: 'csv_header'
value: 'Issuer domicile country'
column_header: 'Issuer domicile country'
- type: 'csv_header'
value: 'Issuer domicile country - value type'
column_header: 'Issuer domicile country - value type'
- type: 'csv_header'
value: 'Issuer ESA 95 sector'
column_header: 'Issuer ESA 95 sector'
- type: 'csv_header'
value: 'Issuer ESA 95 sector - value type'
column_header: 'Issuer ESA 95 sector - value type'
- type: 'csv_header'
value: 'Issuer NACE sector'
column_header: 'Issuer NACE sector'
- type: 'csv_header'
value: 'Price date'
column_header: 'Price date'
- type: 'csv_header'
value: 'Price value'
column_header: 'Price value'
- type: 'csv_header'
value: 'Price value - type'
column_header: 'Price value - type'
- type: 'csv_header'
value: 'Quotation basis'
column_header: 'Quotation basis'
- type: 'csv_header'
value: 'Monthly average price'
column_header: 'Monthly average price'
- type: 'csv_header'
value: 'Accrual start date'
column_header: 'Accrual start date'
- type: 'csv_header'
value: 'Accrued income factor'
column_header: 'Accrued income factor'
- type: 'csv_header'
value: 'Accrued income factor - value type'
column_header: 'Accrued income factor - value type'
- type: 'csv_header'
value: 'Accrued income (Creditor)'
column_header: 'Accrued income (Creditor)'
- type: 'csv_header'
value: 'Accrued income (Creditor) - value type'
column_header: 'Accrued income (Creditor) - value type'
- type: 'csv_header'
value: 'Accrued interest'
column_header: 'Accrued interest'
- type: 'csv_header'
value: 'Yield to maturity'
column_header: 'Yield to maturity'
- type: 'csv_header'
value: 'ESCB issuer identifier'
column_header: 'ESCB issuer identifier'
- type: 'csv_header'
value: 'ESCB issuer identifier type'
column_header: 'ESCB issuer identifier type'
- type: 'csv_header'
value: 'Issuer compound ID'
column_header: 'Issuer compound ID'
- type: 'csv_header'
value: 'Amount Oustanding type'
column_header: 'Amount Oustanding type'
- type: 'csv_header'
value: 'Market Capitalisation'
column_header: 'Market Capitalisation'
- type: 'csv_header'
value: 'Market Capitalisation in euro'
column_header: 'Market Capitalisation in euro'
- type: 'csv_header'
value: 'Security Status'
column_header: 'Security Status'
- type: 'csv_header'
value: 'Instrument suppl class'
column_header: 'Instrument suppl class'
- type: 'csv_header'
value: 'Residual maturity class'
column_header: 'Residual maturity class'
- type: 'csv_header'
value: 'Is In SEC'
column_header: 'Is In SEC'
- type: 'csv_header'
value: 'Is In EADB'
column_header: 'Is In EADB'
- type: 'csv_header'
value: 'Instrument ESA 2010 class'
column_header: 'Instrument ESA 2010 class'
- type: 'csv_header'
value: 'Issuer ESA 2010 sector'
column_header: 'Issuer ESA 2010 sector'
- type: 'csv_header'
value: 'Primary asset classification 2'
column_header: 'Primary asset classification 2'
- type: 'csv_header'
value: 'Instruments seniority type'
column_header: 'Instruments seniority type'
- type: 'csv_header'
value: 'Issuer LEI code'
column_header: 'Issuer LEI code'
- type: 'csv_header'
value: 'Instrument ESA 2010 class - value type'
column_header: 'Instrument ESA 2010 class - value type'
- type: 'csv_header'
value: 'Issuer ESA 2010 class - value type'
column_header: 'Issuer ESA 2010 class - value type'
- type: 'csv_header'
value: 'Security status date'
column_header: 'Security status date'
- type: 'csv_header'
value: 'Group type'
column_header: 'Group type'
- type: 'csv_header'
value: 'Has embedded option'
column_header: 'Has embedded option'
- type: 'csv_header'
value: 'Volume traded'
column_header: 'Volume traded'
- type: 'csv_header'
value: 'Primary listing name'
column_header: 'Primary listing name'
- type: 'csv_header'
value: 'Primary listing residency country'
column_header: 'Primary listing residency country'
- type: 'csv_header'
value: 'Instrument portfolio flags'
column_header: 'Instrument portfolio flags'
- type: 'static'
value: ''
column_header: 'BOND_DURATION'
- type: 'csv_header'
value: 'Residual maturity'
column_header: 'Residual maturity'
- type: 'csv_header'
value: 'Original maturity'
column_header: 'Original maturity'
- type: 'csv_header'
value: 'CFIN classification'
column_header: 'CFIN classification'
- type: 'csv_header'
value: 'First scheduled Coupon date'
column_header: 'First scheduled Coupon date'
- type: 'csv_header'
value: 'Last scheduled Coupon date'
column_header: 'Last scheduled Coupon date'
- type: 'csv_header'
value: 'Coupon rate underlying ISIN'
column_header: 'Coupon rate underlying ISIN'
- type: 'csv_header'
value: 'Coupon rate spread'
column_header: 'Coupon rate spread'
- type: 'csv_header'
value: 'Coupon rate multiplier'
column_header: 'Coupon rate multiplier'
- type: 'csv_header'
value: 'Coupon rate cap'
column_header: 'Coupon rate cap'
- type: 'csv_header'
value: 'Coupon rate floor'
column_header: 'Coupon rate floor'
- type: 'csv_header'
value: 'Issue date tranche'
column_header: 'Issue date tranche'
- type: 'csv_header'
value: 'Issue price tranche'
column_header: 'Issue price tranche'
- type: 'csv_header'
value: 'Is private placement'
column_header: 'Is private placement'
- type: 'csv_header'
value: 'RIAD code'
column_header: 'RIAD code'
- type: 'csv_header'
value: 'RIAD OUID'
column_header: 'RIAD OUID'
- type: 'csv_header'
value: 'ESG Flag 1'
column_header: 'ESG Flag 1'
- type: 'csv_header'
value: 'ESG Flag 2'
column_header: 'ESG Flag 2'
- type: 'csv_header'
value: 'ESG Flag 3'
column_header: 'ESG Flag 3'
- type: 'csv_header'
value: 'Strip'
column_header: 'Strip'
- type: 'csv_header'
value: 'Depository receipt'
column_header: 'Depository receipt'
- type: 'csv_header'
value: 'Rule 144A'
column_header: 'Rule 144A'
- type: 'csv_header'
value: 'Reg S'
column_header: 'Reg S'
- type: 'csv_header'
value: 'Warrant'
column_header: 'Warrant'
- type: 'csv_header'
value: 'CSEC Relevance - stock'
column_header: 'CSEC Relevance - stock'
- type: 'csv_header'
value: 'CSEC relevance - gross issuance'
column_header: 'CSEC relevance - gross issuance'
- type: 'csv_header'
value: 'CSEC relevance - redemption'
column_header: 'CSEC relevance - redemption'
- type: 'csv_header'
value: 'Accruing coupon'
column_header: 'Accruing coupon'
- type: 'csv_header'
value: 'Accruing discount'
column_header: 'Accruing discount'
- type: 'csv_header'
value: 'STEP Id'
column_header: 'STEP Id'
- type: 'csv_header'
value: 'Program Name'
column_header: 'Program Name'
- type: 'csv_header'
value: 'Program Ceiling'
column_header: 'Program Ceiling'
- type: 'csv_header'
value: 'Program Status'
column_header: 'Program Status'
- type: 'csv_header'
value: 'Issuer NACE21 sector'
column_header: 'Issuer NACE21 sector'
- type: 'csv_header'
value: 'Instrument quotation basis'
column_header: 'Instrument quotation basis'
- type: 'csv_header'
value: 'placeholder 38'
column_header: 'placeholder 38'
- type: 'csv_header'
value: 'placeholder 39'
column_header: 'placeholder 39'
- type: 'csv_header'
value: 'placeholder 40'
column_header: 'placeholder 40'
- type: 'csv_header'
value: 'placeholder 41'
column_header: 'placeholder 41'
- type: 'csv_header'
value: 'placeholder 42'
column_header: 'placeholder 42'
- type: 'csv_header'
value: 'placeholder 43'
column_header: 'placeholder 43'
- type: 'csv_header'
value: 'placeholder 44'
column_header: 'placeholder 44'
- type: 'csv_header'
value: 'placeholder 45'
column_header: 'placeholder 45'
- type: 'csv_header'
value: 'placeholder 46'
column_header: 'placeholder 46'
- type: 'csv_header'
value: 'placeholder 47'
column_header: 'placeholder 47'
- type: 'csv_header'
value: 'placeholder 48'
column_header: 'placeholder 48'
- type: 'csv_header'
value: 'placeholder 49'
column_header: 'placeholder 49'
- type: 'csv_header'
value: 'placeholder 50'
column_header: 'placeholder 50'

View File

View File

@@ -0,0 +1,103 @@
encoding_type: latin1
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
##file format
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_INSTR_DESC_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_INSTR_DESC_FULL
output_table: CSDB_INSTR_DESC_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_INSTR_ID'
column_header: 'IDIRINSTRUMENT'
- type: 'csv_header'
value: 'ISIN'
column_header: 'ISIN'
- type: 'csv_header'
value: 'MOO_INSTR_ID'
column_header: 'MOO_INSTR_ID'
- type: 'csv_header'
value: 'SNP_INSTR_ID'
column_header: 'SNP_INSTR_ID'
- type: 'csv_header'
value: 'FTC_INSTR_ID'
column_header: 'FITCH_IDENTIFIER'
- type: 'csv_header'
value: 'DBR_INSTR_ID'
column_header: 'DBRS_IDENTIFIER'
- type: 'csv_header'
value: 'EA_STATUS'
column_header: 'EA_STATUS'
- type: 'csv_header'
value: 'IS_TMS'
column_header: 'IS_TMS'
- type: 'csv_header'
value: 'DBRS_COVERED_BOND_PROGRAM_ID'
column_header: 'DBRS_COVERED_BOND_PROGRAM'
- type: 'csv_header'
value: 'FITCH_PROGRAM_ID'
column_header: 'FITCH_PRG_IDENTIFIER'
- type: 'csv_header'
value: 'MOO_DEAL_NUMBER'
column_header: 'MOO_DEAL_NUMBER'
- type: 'csv_header'
value: 'SNP_PROGRAM_ID'
column_header: 'SNP_PROGRAM_ID'
- type: 'csv_header'
value: 'DBRS_DEBT_TYPE'
column_header: 'IDIRDEBTTYPE'
- type: 'csv_header'
value: 'SNP_DEBT_TYPE'
column_header: 'SNP_DEBT_TYPE'
- type: 'csv_header'
value: 'MOODY_SENIORITY'
column_header: 'MOODY_SENIORITY'
- type: 'csv_header'
value: 'FITCH_DEBT_LEVEL_CODE'
column_header: 'FITCH_DEBT_LEVEL_CODE'
- type: 'csv_header'
value: 'DBRS_RANK_TYPE'
column_header: 'DBRS_RANK_TYPE'
- type: 'csv_header'
value: 'DBRS_SECURITY_TYPE'
column_header: 'DBRS_SECURITY_TYPE'
- type: 'csv_header'
value: 'SCO_DEBT_TYPE'
column_header: 'SCO_DEBT_TYPE'
- type: 'csv_header'
value: 'SCO_INSTR_ID'
column_header: 'SCO_INSTR_ID'
- type: 'csv_header'
value: 'SCO_COVERED_BOND_PROGRAM'
column_header: 'SCO_COVERED_BOND_PROGRAM'
- type: 'csv_header'
value: 'SCO_CATEGORY'
column_header: 'SCO_CATEGORY'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,130 @@
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_INSTR_RAT_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_INSTR_RAT_FULL
output_table: CSDB_INSTR_RAT_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_INSTR_ID'
column_header: 'RDB_INSTR_ID'
- type: 'csv_header'
value: 'SOURCE'
column_header: 'SOURCE'
- type: 'csv_header'
value: 'RATING_SCHEME'
column_header: 'RATING_SCHEME'
- type: 'csv_header'
value: 'RATING'
column_header: 'RATING'
- type: 'csv_header'
value: 'RATING_DATE'
column_header: 'RATING_DATE'
- type: 'csv_header'
value: 'TIME_HORIZON'
column_header: 'TIME_HORIZON'
- type: 'csv_header'
value: 'CURRENCY_TYPE'
column_header: 'CURRENCY_TYPE'
- type: 'csv_header'
value: 'NOTES'
column_header: 'NOTES'
- type: 'csv_header'
value: 'VALID_FROM'
column_header: 'VALID_FROM'
- type: 'csv_header'
value: 'VALID_UNTIL'
column_header: 'VALID_UNTIL'
- type: 'csv_header'
value: 'RDB_RATINGS_ID'
column_header: 'RDB_RATINGS_ID'
- type: 'csv_header'
value: 'OUTLOOK'
column_header: 'WATCHLIST'
- type: 'csv_header'
value: 'OUTLOOK_DATE'
column_header: 'WATCHLIST_DATE'
- type: 'csv_header'
value: 'WATCHLIST'
column_header: 'OUTLOOK'
- type: 'csv_header'
value: 'WATCHLIST_DATE'
column_header: 'OUTLOOK_DATE'
- type: 'csv_header'
value: 'RATING_ACTION'
column_header: 'RATING_ACTION'
- type: 'csv_header'
value: 'RATING_ACTION_DATE'
column_header: 'RATING_ACTION_DATE'
- type: 'csv_header'
value: 'IS_PRELIMINARY'
column_header: 'IS_PRELIMINARY'
- type: 'csv_header'
value: 'RATING_RAW'
column_header: 'RATING_RAW'
- type: 'csv_header'
value: 'RATING_TYPE'
column_header: 'RATING_TYPE'
- type: 'csv_header'
value: 'ENDORSEMENT_INDICATOR'
column_header: 'ENDORSEMENT_INDICATOR'
- type: 'csv_header'
value: 'LAST_REVIEW_DATE'
column_header: 'LAST_REVIEW_DATE'
- type: 'csv_header'
value: 'PLACEHOLDER6'
column_header: 'PLACEHOLDER6'
- type: 'csv_header'
value: 'PLACEHOLDER7'
column_header: 'PLACEHOLDER7'
- type: 'csv_header'
value: 'PLACEHOLDER8'
column_header: 'PLACEHOLDER8'
- type: 'csv_header'
value: 'PLACEHOLDER9'
column_header: 'PLACEHOLDER9'
- type: 'csv_header'
value: 'PLACEHOLDER10'
column_header: 'PLACEHOLDER10'
- type: 'csv_header'
value: 'PLACEHOLDER11'
column_header: 'PLACEHOLDER11'
- type: 'csv_header'
value: 'PLACEHOLDER12'
column_header: 'PLACEHOLDER12'
- type: 'csv_header'
value: 'PLACEHOLDER13'
column_header: 'PLACEHOLDER13'
- type: 'csv_header'
value: 'PLACEHOLDER14'
column_header: 'PLACEHOLDER14'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,106 @@
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_ISSUER_DESC_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_ISSUER_DESC_FULL
output_table: CSDB_ISSUER_DESC_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_ISSUER_ID'
column_header: 'RDB_ISSUER_ID'
- type: 'csv_header'
value: 'ISSUER_NAME'
column_header: 'ISSUERNAME'
- type: 'csv_header'
value: 'COUNTRY_DOMICILE'
column_header: 'COUNTRY_DOMICILE'
- type: 'csv_header'
value: 'IS_SOVEREIGN'
column_header: 'IS_SOVEREIGN'
- type: 'csv_header'
value: 'MOO_ISSUER_ID'
column_header: 'MOODY_IDENTIFIER'
- type: 'csv_header'
value: 'SNP_ISSUER_ID'
column_header: 'SNP_ISSUER_ID'
- type: 'csv_header'
value: 'FTC_ISSUER_ID'
column_header: 'FITCH_IDENTIFIER'
- type: 'csv_header'
value: 'DBR_ISSUER_ID'
column_header: 'DBRS_IDENTIFIER'
- type: 'csv_header'
value: 'LEI_ISSUER_ID'
column_header: 'LEI_ISSUER_ID'
- type: 'csv_header'
value: 'RIAD_CODE'
column_header: 'RIAD_CODE'
- type: 'csv_header'
value: 'RIAD_OUID'
column_header: 'RIAD_OUID'
- type: 'csv_header'
value: 'CLASH_GROUP_STATUS'
column_header: 'CLASH_GROUP_STATUS'
- type: 'csv_header'
value: 'SCO_ISSUER_ID'
column_header: 'SCO_ISSUER_ID'
- type: 'csv_header'
value: 'PLACEHOLDER5'
column_header: 'PLACEHOLDER5'
- type: 'csv_header'
value: 'PLACEHOLDER6'
column_header: 'PLACEHOLDER6'
- type: 'csv_header'
value: 'PLACEHOLDER7'
column_header: 'PLACEHOLDER7'
- type: 'csv_header'
value: 'PLACEHOLDER8'
column_header: 'PLACEHOLDER8'
- type: 'csv_header'
value: 'PLACEHOLDER9'
column_header: 'PLACEHOLDER9'
- type: 'csv_header'
value: 'PLACEHOLDER10'
column_header: 'PLACEHOLDER10'
- type: 'csv_header'
value: 'PLACEHOLDER11'
column_header: 'PLACEHOLDER11'
- type: 'csv_header'
value: 'PLACEHOLDER12'
column_header: 'PLACEHOLDER12'
- type: 'csv_header'
value: 'PLACEHOLDER13'
column_header: 'PLACEHOLDER13'
- type: 'csv_header'
value: 'PLACEHOLDER14'
column_header: 'PLACEHOLDER14'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,131 @@
encoding_type: latin1
# Global configurations
tmpdir: /tmp
inbox_prefix: INBOX/CSDB/FullRatingsDissemination
archive_prefix: ARCHIVE/CSDB/FullRatingsDissemination
workflow_name: w_ODS_CSDB_RATINGS_FULL
validation_schema_path: None
file_type: csv
tasks:
- task_name: m_ODS_CSDB_ISSUER_RAT_FULL_PARSE
ods_prefix: INBOX/CSDB/FullRatingsDissemination/CSDB_ISSUER_RAT_FULL
output_table: CSDB_ISSUER_RAT_FULL
output_columns:
- type: 'a_key'
column_header: 'A_KEY'
- type: 'workflow_key'
column_header: 'A_WORKFLOW_HISTORY_KEY'
- type: 'csv_header'
value: 'RDB_ISSUER_ID'
column_header: 'RDB_ISSUER_ID'
- type: 'csv_header'
value: 'SOURCE'
column_header: 'SOURCE'
- type: 'csv_header'
value: 'RATING_SCHEME'
column_header: 'RATING_SCHEME'
- type: 'csv_header'
value: 'RATING'
column_header: 'RATING'
- type: 'csv_header'
value: 'RATING_DATE'
column_header: 'RATING_DATE'
- type: 'csv_header'
value: 'TIME_HORIZON'
column_header: 'TIME_HORIZON'
- type: 'csv_header'
value: 'CURRENCY_TYPE'
column_header: 'CURRENCY_TYPE'
- type: 'csv_header'
value: 'NOTES'
column_header: 'NOTES'
- type: 'csv_header'
value: 'VALID_FROM'
column_header: 'VALID_FROM'
- type: 'csv_header'
value: 'VALID_UNTIL'
column_header: 'VALID_UNTIL'
- type: 'csv_header'
value: 'RDB_RATINGS_ID'
column_header: 'RDB_RATINGS_ID'
- type: 'csv_header'
value: 'OUTLOOK'
column_header: 'OUTLOOK'
- type: 'csv_header'
value: 'OUTLOOK_DATE'
column_header: 'OUTLOOK_DATE'
- type: 'csv_header'
value: 'WATCHLIST'
column_header: 'WATCHLIST'
- type: 'csv_header'
value: 'WATCHLIST_DATE'
column_header: 'WATCHLIST_DATE'
- type: 'csv_header'
value: 'RATING_ACTION'
column_header: 'RATING_ACTION'
- type: 'csv_header'
value: 'RATING_ACTION_DATE'
column_header: 'RATING_ACTION_DATE'
- type: 'csv_header'
value: 'IS_PRELIMINARY'
column_header: 'IS_PRELIMINARY'
- type: 'csv_header'
value: 'RATING_RAW'
column_header: 'RATING_RAW'
- type: 'csv_header'
value: 'RATING_TYPE'
column_header: 'RATING_TYPE'
- type: 'csv_header'
value: 'ENDORSEMENT_INDICATOR'
column_header: 'ENDORSEMENT_INDICATOR'
- type: 'csv_header'
value: 'LAST_REVIEW_DATE'
column_header: 'LAST_REVIEW_DATE'
- type: 'csv_header'
value: 'PLACEHOLDER6'
column_header: 'PLACEHOLDER6'
- type: 'csv_header'
value: 'PLACEHOLDER7'
column_header: 'PLACEHOLDER7'
- type: 'csv_header'
value: 'PLACEHOLDER8'
column_header: 'PLACEHOLDER8'
- type: 'csv_header'
value: 'PLACEHOLDER9'
column_header: 'PLACEHOLDER9'
- type: 'csv_header'
value: 'PLACEHOLDER10'
column_header: 'PLACEHOLDER10'
- type: 'csv_header'
value: 'PLACEHOLDER11'
column_header: 'PLACEHOLDER11'
- type: 'csv_header'
value: 'PLACEHOLDER12'
column_header: 'PLACEHOLDER12'
- type: 'csv_header'
value: 'PLACEHOLDER13'
column_header: 'PLACEHOLDER13'
- type: 'csv_header'
value: 'PLACEHOLDER14'
column_header: 'PLACEHOLDER14'
- type: 'csv_header'
value: 'PLACEHOLDER15'
column_header: 'PLACEHOLDER15'
- type: 'csv_header'
value: 'PLACEHOLDER16'
column_header: 'PLACEHOLDER16'
- type: 'csv_header'
value: 'PLACEHOLDER17'
column_header: 'PLACEHOLDER17'
- type: 'csv_header'
value: 'PLACEHOLDER18'
column_header: 'PLACEHOLDER18'
- type: 'csv_header'
value: 'PLACEHOLDER19'
column_header: 'PLACEHOLDER19'
- type: 'csv_header'
value: 'PLACEHOLDER20'
column_header: 'PLACEHOLDER20'

View File

@@ -0,0 +1,420 @@
import sys
import os
import json
import logging
from pathlib import Path
from datetime import timedelta, datetime, timezone
from email.utils import parsedate_to_datetime
from airflow import DAG
from airflow.models import Variable
from airflow.decorators import task as af_task
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.python import get_current_context
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
dag_id = "w_ODS_CSDB_RATINGS_FULL_COORDINATOR"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
OCI_BUCKET = os.getenv("INBOX_BUCKET")
OBJECT_PREFIX = os.getenv("OBJECT_PREFIX", "csdb/ratings/full/")
REPROCESS = (os.getenv("CSDB_REPROCESS", "false").lower() in ("1", "true", "yes"))
LAST_TS_VAR = f"{dag_id}__last_seen_ts"
PROCESSED_TS_VAR = f"{dag_id}__processed_objects_ts"
def _oci_client():
import oci
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
try:
rp_signer = oci.auth.signers.get_resource_principals_signer()
cfg = {"region": region} if region else {}
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
except Exception as e:
logging.info("RP not available: %s", e)
try:
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
cfg = {"region": region} if region else {}
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
except Exception as e:
logging.info("IP not available: %s", e)
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
raise RuntimeError("Failed to create OCI client")
def _load_processed_map() -> dict[str, float]:
try:
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
m = json.loads(raw) or {}
if isinstance(m, dict):
return {k: float(v) for k, v in m.items()}
except Exception:
pass
return {}
def _list_all_zip_objects(include_processed: bool = False) -> list[dict]:
"""List all zip files in the bucket"""
if not OCI_NAMESPACE or not OCI_BUCKET:
raise AirflowFailException("BUCKET_NAMESPACE and INBOX_BUCKET must be set")
client = _oci_client()
processed_map = _load_processed_map() if not include_processed else {}
resp = client.list_objects(OCI_NAMESPACE, OCI_BUCKET, prefix=OBJECT_PREFIX)
all_items: list[dict] = []
for o in (resp.data.objects or []):
name = (o.name or "").strip()
base = name.rsplit("/", 1)[-1] if name else ""
if not name or name.endswith('/') or not base:
continue
if not ("STC-FullRatingsDissemination" in base and base.lower().endswith(".zip")):
continue
# Get timestamp
ts = None
t = getattr(o, "time_created", None)
if t:
try:
ts = t.timestamp() if hasattr(t, "timestamp") else float(t) / 1000.0
except Exception:
ts = None
if ts is None:
try:
head = client.head_object(OCI_NAMESPACE, OCI_BUCKET, name)
lm = head.headers.get("last-modified") or head.headers.get("Last-Modified")
if lm:
dt = parsedate_to_datetime(lm)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
ts = dt.timestamp()
except Exception as e:
logging.warning("head_object failed for %s: %s", name, e)
if ts is None:
ts = datetime.now(timezone.utc).timestamp()
# Check if already processed
last_proc_ts = float(processed_map.get(name, 0.0))
is_processed = (ts <= last_proc_ts) if processed_map else False
item = {
"name": name,
"base": base,
"mtime": ts,
"is_processed": is_processed
}
all_items.append(item)
# Sort by timestamp (oldest first)
all_items.sort(key=lambda x: x["mtime"])
return all_items
def _list_new_zip_objects() -> list[dict]:
"""List only new/unprocessed zip files"""
all_items = _list_all_zip_objects(include_processed=False)
# Filter out processed items
new_items = [item for item in all_items if not item.get("is_processed", False)]
logging.info("Found %d new STC-FullRatingsDissemination zip file(s) (sorted oldest to newest)", len(new_items))
return new_items
def _find_specific_zip(filename_pattern: str) -> dict:
"""Find a specific zip file by name pattern"""
all_items = _list_all_zip_objects(include_processed=True)
# Try exact match first
for item in all_items:
if item["base"] == filename_pattern or item["name"] == filename_pattern:
logging.info("Found exact match: %s", item["base"])
return item
# Try partial match
for item in all_items:
if filename_pattern.lower() in item["base"].lower():
logging.info("Found partial match: %s", item["base"])
return item
raise AirflowFailException(f"No zip file found matching pattern: {filename_pattern}")
with DAG(
dag_id=dag_id,
default_args=default_args,
description='CSDB Ratings Full Coordinator: Lists and triggers processing for zip files',
schedule_interval="0 */6 * * *", # Every 6 hours, adjust as needed
catchup=False,
max_active_runs=1,
render_template_as_native_obj=True,
tags=["CSDB", "COORDINATOR", "ODS", "OCI", "RATINGS"],
) as dag:
@af_task(task_id="determine_processing_mode")
def determine_processing_mode(**context):
"""
Determine what to process based on dag_run configuration.
Configuration options:
1. No config or mode='all': Process all new zip files
2. mode='specific' + filename='xxx': Process specific zip file
3. mode='reprocess_all': Reprocess all zip files (including already processed)
4. mode='list_only': Just list available files without processing
5. filenames=['file1.zip', 'file2.zip']: Process specific list of files
"""
conf = context.get('dag_run').conf or {}
mode = conf.get('mode', 'all')
filename = conf.get('filename')
filenames = conf.get('filenames', [])
force_reprocess = conf.get('force_reprocess', False)
limit = conf.get('limit') # Limit number of files to process
logging.info("Processing mode: %s", mode)
logging.info("Configuration: %s", json.dumps(conf, indent=2))
result = {
"mode": mode,
"filename": filename,
"filenames": filenames,
"force_reprocess": force_reprocess,
"limit": limit
}
return result
@af_task(task_id="list_zip_files")
def list_zip_files(mode_config: dict):
"""List zip files based on the processing mode"""
mode = mode_config.get("mode", "all")
filename = mode_config.get("filename")
filenames = mode_config.get("filenames", [])
force_reprocess = mode_config.get("force_reprocess", False)
limit = mode_config.get("limit")
zip_files = []
if mode == "list_only":
# Just list all files for information
all_files = _list_all_zip_objects(include_processed=True)
logging.info("=== Available ZIP Files ===")
for idx, f in enumerate(all_files, 1):
status = "PROCESSED" if f.get("is_processed") else "NEW"
logging.info("%d. [%s] %s (mtime: %s)",
idx, status, f["base"],
datetime.fromtimestamp(f["mtime"]).isoformat())
raise AirflowSkipException("List only mode - no processing triggered")
elif mode == "specific":
# Process a specific file
if not filename:
raise AirflowFailException("mode='specific' requires 'filename' parameter")
zip_file = _find_specific_zip(filename)
zip_files = [zip_file]
logging.info("Processing specific file: %s", zip_file["base"])
elif mode == "specific_list":
# Process a list of specific files
if not filenames:
raise AirflowFailException("mode='specific_list' requires 'filenames' parameter")
for fn in filenames:
try:
zip_file = _find_specific_zip(fn)
zip_files.append(zip_file)
except Exception as e:
logging.warning("Could not find file %s: %s", fn, e)
if not zip_files:
raise AirflowFailException("None of the specified files were found")
logging.info("Processing %d specific files", len(zip_files))
elif mode == "reprocess_all":
# Reprocess all files (including already processed)
all_files = _list_all_zip_objects(include_processed=True)
zip_files = all_files
logging.info("Reprocessing all %d files", len(zip_files))
elif mode == "date_range":
# Process files within a date range
start_date = mode_config.get("start_date")
end_date = mode_config.get("end_date")
if not start_date or not end_date:
raise AirflowFailException("mode='date_range' requires 'start_date' and 'end_date'")
start_ts = datetime.fromisoformat(start_date).timestamp()
end_ts = datetime.fromisoformat(end_date).timestamp()
all_files = _list_all_zip_objects(include_processed=True)
zip_files = [f for f in all_files if start_ts <= f["mtime"] <= end_ts]
logging.info("Found %d files in date range %s to %s",
len(zip_files), start_date, end_date)
else: # mode == "all" or default
# Process all new files
zip_files = _list_new_zip_objects()
if not zip_files:
logging.info("No new zip files to process")
raise AirflowSkipException("No new zip files found")
# Apply limit if specified
if limit and isinstance(limit, int) and limit > 0:
original_count = len(zip_files)
zip_files = zip_files[:limit]
logging.info("Limited processing from %d to %d files", original_count, len(zip_files))
# Sort by timestamp (oldest first)
zip_files.sort(key=lambda x: x["mtime"])
logging.info("Selected %d zip file(s) for processing:", len(zip_files))
for idx, f in enumerate(zip_files, 1):
logging.info("%d. %s (mtime: %s)",
idx, f["base"],
datetime.fromtimestamp(f["mtime"]).isoformat())
return {
"zip_files": zip_files,
"mode": mode,
"force_reprocess": force_reprocess
}
@af_task(task_id="trigger_processing_dags")
def trigger_processing_dags(list_result: dict):
"""Trigger the processing DAG for each zip file sequentially"""
from airflow.api.common.trigger_dag import trigger_dag
from time import sleep
zip_files = list_result.get("zip_files", [])
mode = list_result.get("mode", "all")
force_reprocess = list_result.get("force_reprocess", False)
if not zip_files:
logging.info("No zip files to process")
return []
triggered_runs = []
for idx, zip_file in enumerate(zip_files):
conf = {
"zip_object_name": zip_file["name"],
"zip_base_name": zip_file["base"],
"zip_mtime": zip_file["mtime"],
"sequence_number": idx + 1,
"total_files": len(zip_files),
"processing_mode": mode,
"force_reprocess": force_reprocess,
"is_processed": zip_file.get("is_processed", False)
}
logging.info(f"Triggering processing DAG for file {idx + 1}/{len(zip_files)}: {zip_file['base']}")
try:
run_id = trigger_dag(
dag_id="w_ODS_CSDB_RATINGS_FULL_CORE",
run_id=f"coordinator__{datetime.now().strftime('%Y%m%d_%H%M%S')}__{idx}",
conf=conf,
execution_date=None,
replace_microseconds=False,
)
triggered_runs.append({
"run_id": str(run_id),
"zip_file": zip_file["base"],
"sequence": idx + 1,
"status": "triggered"
})
logging.info(f"Successfully triggered run: {run_id}")
except Exception as e:
logging.error(f"Failed to trigger processing for {zip_file['base']}: {e}")
triggered_runs.append({
"zip_file": zip_file["base"],
"sequence": idx + 1,
"status": "failed",
"error": str(e)
})
# Small delay between triggers to avoid overwhelming the system
sleep(2)
logging.info(f"Triggered {len([r for r in triggered_runs if r.get('status') == 'triggered'])} processing DAG runs")
logging.info(f"Failed to trigger {len([r for r in triggered_runs if r.get('status') == 'failed'])} runs")
return triggered_runs
@af_task(task_id="summary_report")
def summary_report(trigger_result: list):
"""Generate a summary report of triggered runs"""
if not trigger_result:
logging.info("No runs were triggered")
return
successful = [r for r in trigger_result if r.get("status") == "triggered"]
failed = [r for r in trigger_result if r.get("status") == "failed"]
logging.info("=" * 80)
logging.info("PROCESSING SUMMARY")
logging.info("=" * 80)
logging.info(f"Total files: {len(trigger_result)}")
logging.info(f"Successfully triggered: {len(successful)}")
logging.info(f"Failed to trigger: {len(failed)}")
if successful:
logging.info("\nSuccessfully triggered:")
for r in successful:
logging.info(f" - {r['zip_file']} (run_id: {r['run_id']})")
if failed:
logging.info("\nFailed to trigger:")
for r in failed:
logging.info(f" - {r['zip_file']} (error: {r.get('error', 'unknown')})")
logging.info("=" * 80)
return {
"total": len(trigger_result),
"successful": len(successful),
"failed": len(failed)
}
# Build DAG structure
mode_task = determine_processing_mode()
list_task = list_zip_files(mode_task)
trigger_task = trigger_processing_dags(list_task)
summary_task = summary_report(trigger_task)
mode_task >> list_task >> trigger_task >> summary_task
logging.info("CSDB Ratings Full Coordinator DAG ready")

View File

@@ -0,0 +1,388 @@
import sys
import os
import json
import logging
import zipfile
from pathlib import Path
from datetime import timedelta, datetime, timezone
from airflow import DAG
from airflow.models import Variable
from airflow.decorators import task as af_task
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.trigger_rule import TriggerRule
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import get_current_context
try:
from airflow.exceptions import AirflowFailException, AirflowSkipException
except Exception:
from airflow.exceptions import AirflowException as AirflowFailException
from airflow.exceptions import AirflowSkipException
sys.path.append('/opt/airflow/python/mrds_common')
sys.path.append('/opt/airflow/src/airflow/dags/ods/csdb')
from mrds.utils.manage_runs import init_workflow as mrds_init_workflow, finalise_workflow as mrds_finalise_workflow
from mrds.core import main as mrds_main
dag_id = "w_ODS_CSDB_RATINGS_FULL_CORE"
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': days_ago(1),
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
WORKFLOW_CONFIG = {
"database_name": "ODS",
"workflow_name": dag_id,
}
OCI_NAMESPACE = os.getenv("BUCKET_NAMESPACE")
OCI_BUCKET = os.getenv("INBOX_BUCKET")
OBJECT_PREFIX = os.getenv("OBJECT_PREFIX", "csdb/ratings/full/")
TEMP_DIR = "/tmp/csdb_ratings"
PROCESSED_TS_VAR = "w_ODS_CSDB_RATINGS_FULL_COORDINATOR__processed_objects_ts"
# CSV configurations
CSV_CONFIGS = [
{
"source_filename": "FULL_INSTRUMENT_DESCRIPTION.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_INSTR_DESC_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_INSTRUMENT_DESCRIPTION"
},
{
"source_filename": "FULL_INSTRUMENT_RATINGS.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_INSTR_RAT_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_INSTRUMENT_RATINGS"
},
{
"source_filename": "FULL_ISSUER_DESCRIPTION.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_ISSUER_DESC_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_ISSUER_DESCRIPTION"
},
{
"source_filename": "FULL_ISSUER_RATINGS.csv",
"config_yaml": "/opt/airflow/src/airflow/dags/ods/csdb/full_ratings/config/m_ODS_CSDB_ISSUER_RAT_FULL_PARSE.yaml",
"task_name": "m_ODS_CSDB_RATINGS_FULL_ISSUER_RATINGS"
}
]
def _oci_client():
import oci
region = os.getenv("OCI_REGION") or os.getenv("OCI_RESOURCE_PRINCIPAL_REGION") or "eu-frankfurt-1"
try:
rp_signer = oci.auth.signers.get_resource_principals_signer()
cfg = {"region": region} if region else {}
logging.info("Using OCI Resource Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=rp_signer)
except Exception as e:
logging.info("RP not available: %s", e)
try:
ip_signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
cfg = {"region": region} if region else {}
logging.info("Using OCI Instance Principals signer (region=%s).", cfg.get("region"))
return oci.object_storage.ObjectStorageClient(cfg, signer=ip_signer)
except Exception as e:
logging.info("IP not available: %s", e)
logging.error("Neither Resource Principals nor Instance Principals authentication found.")
raise RuntimeError("Failed to create OCI client")
def _load_processed_map() -> dict[str, float]:
try:
raw = Variable.get(PROCESSED_TS_VAR, default_var="{}")
m = json.loads(raw) or {}
if isinstance(m, dict):
return {k: float(v) for k, v in m.items()}
except Exception:
pass
return {}
def _save_processed_map(m: dict[str, float]) -> None:
Variable.set(PROCESSED_TS_VAR, json.dumps(m))
def _mark_processed(zip_key: str, zip_mtime: float):
m = _load_processed_map()
m[zip_key] = float(zip_mtime)
_save_processed_map(m)
logging.info("Marked as processed: %s (mtime=%s)", zip_key, zip_mtime)
with DAG(
dag_id=dag_id,
default_args=default_args,
description='CSDB Ratings Full Processor: Processes one zip file with 4 CSV files in parallel',
schedule_interval=None, # Triggered by coordinator
catchup=False,
max_active_runs=3, # Allow some parallelism but controlled
render_template_as_native_obj=True,
tags=["CSDB", "PROCESSOR", "MRDS", "ODS", "OCI", "RATINGS"],
) as dag:
@af_task(task_id="get_zip_config")
def get_zip_config(**context):
"""Get the zip file configuration from dag_run conf"""
conf = context['dag_run'].conf or {}
zip_object_name = conf.get('zip_object_name')
zip_base_name = conf.get('zip_base_name')
zip_mtime = conf.get('zip_mtime')
sequence_number = conf.get('sequence_number', 0)
total_files = conf.get('total_files', 0)
if not all([zip_object_name, zip_base_name, zip_mtime]):
raise AirflowFailException("Missing required configuration: zip_object_name, zip_base_name, or zip_mtime")
logging.info(f"Processing zip file {sequence_number}/{total_files}: {zip_base_name}")
return {
"zip_object_name": zip_object_name,
"zip_base_name": zip_base_name,
"zip_mtime": zip_mtime,
"sequence_number": sequence_number,
"total_files": total_files
}
@af_task(task_id="download_and_unzip")
def download_and_unzip(config: dict):
"""Download and unzip the specific zip file"""
zip_key = config["zip_object_name"]
zip_base = config["zip_base_name"]
client = _oci_client()
os.makedirs(TEMP_DIR, exist_ok=True)
# Create unique temp directory for this run
run_temp_dir = os.path.join(TEMP_DIR, f"run_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
os.makedirs(run_temp_dir, exist_ok=True)
local_zip = os.path.join(run_temp_dir, zip_base)
logging.info("Downloading %s to %s", zip_key, local_zip)
get_obj = client.get_object(OCI_NAMESPACE, OCI_BUCKET, zip_key)
with open(local_zip, 'wb') as f:
for chunk in get_obj.data.raw.stream(1024 * 1024, decode_content=False):
f.write(chunk)
logging.info("Unzipping %s", local_zip)
with zipfile.ZipFile(local_zip, 'r') as zip_ref:
zip_ref.extractall(run_temp_dir)
extracted_files = []
for root, dirs, files in os.walk(run_temp_dir):
for file in files:
if file.endswith('.csv'):
full_path = os.path.join(root, file)
extracted_files.append({"filename": file, "path": full_path})
logging.info("Extracted CSV: %s", file)
logging.info("Total CSV files extracted: %d", len(extracted_files))
return {
"extracted_files": extracted_files,
"zip_config": config,
"temp_dir": run_temp_dir
}
@af_task(task_id="init_workflow")
def init_workflow(unzipped: dict):
"""Initialize MRDS workflow"""
database_name = WORKFLOW_CONFIG["database_name"]
workflow_name = WORKFLOW_CONFIG["workflow_name"]
ctx = get_current_context()
run_id = str(ctx['ti'].run_id)
a_workflow_history_key = mrds_init_workflow(database_name, workflow_name, run_id)
extracted_files = unzipped.get("extracted_files", [])
zip_config = unzipped.get("zip_config", {})
temp_dir = unzipped.get("temp_dir")
task_configs = []
for csv_config in CSV_CONFIGS:
matching_file = next(
(ef for ef in extracted_files if ef["filename"] == csv_config["source_filename"]),
None
)
if matching_file:
task_configs.append({
"task_name": csv_config["task_name"],
"source_filename": csv_config["source_filename"],
"source_path": matching_file["path"],
"config_file": csv_config["config_yaml"],
})
logging.info("Prepared task config for %s", csv_config["source_filename"])
else:
logging.warning("CSV file %s not found in extracted files", csv_config["source_filename"])
return {
"workflow_history_key": a_workflow_history_key,
"task_configs": task_configs,
"zip_config": zip_config,
"temp_dir": temp_dir
}
def run_mrds_task(task_config: dict, **context):
"""Run MRDS processing for a single CSV file"""
ti = context['ti']
task_name = task_config["task_name"]
source_path = task_config["source_path"]
config_file = task_config["config_file"]
if not os.path.exists(config_file):
raise FileNotFoundError(f"Config file not found: {config_file}")
if not os.path.exists(source_path):
raise FileNotFoundError(f"Source CSV file not found: {source_path}")
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
workflow_history_key = init_bundle.get('workflow_history_key')
if not workflow_history_key:
raise AirflowFailException("No workflow_history_key from init_workflow")
try:
logging.info(f"{task_name}: Starting MRDS processing for {source_path}")
mrds_main(workflow_history_key, source_path, config_file, generate_workflow_context=False)
logging.info(f"{task_name}: MRDS processing completed successfully")
except Exception as e:
logging.exception(f"{task_name}: MRDS failed on {source_path}")
raise
return "SUCCESS"
def finalise_workflow_task(**context):
"""Finalize the workflow and mark zip as processed"""
ti = context['ti']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
a_workflow_history_key = init_bundle.get('workflow_history_key')
zip_config = init_bundle.get('zip_config', {})
if a_workflow_history_key is None:
raise AirflowFailException("No workflow history key; cannot finalise workflow")
# Check if any CSV task failed
csv_task_ids = [cfg["task_name"] for cfg in CSV_CONFIGS]
dag_run = context['dag_run']
tis = [t for t in dag_run.get_task_instances() if t.task_id in csv_task_ids]
from airflow.utils.state import State
any_failed = any(ti_i.state in {State.FAILED, State.UPSTREAM_FAILED} for ti_i in tis)
if not any_failed:
# Mark zip as processed
zip_key = zip_config.get("zip_object_name")
zip_mtime = zip_config.get("zip_mtime")
if zip_key and zip_mtime:
_mark_processed(zip_key, zip_mtime)
mrds_finalise_workflow(a_workflow_history_key, "Y")
logging.info("Finalised workflow %s as SUCCESS", a_workflow_history_key)
else:
failed_tasks = [ti_i.task_id for ti_i in tis if ti_i.state in {State.FAILED, State.UPSTREAM_FAILED}]
mrds_finalise_workflow(a_workflow_history_key, "N")
logging.error("Finalised workflow %s as FAILED (failed tasks=%s)",
a_workflow_history_key, failed_tasks)
raise AirflowFailException(f"Workflow failed for tasks: {failed_tasks}")
@af_task(task_id="cleanup_temp_files")
def cleanup_temp_files(**context):
"""Clean up temporary files for this run"""
import shutil
ti = context['ti']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
temp_dir = init_bundle.get('temp_dir')
if temp_dir and os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
logging.info("Cleaned up temp directory: %s", temp_dir)
@af_task(task_id="move_zip_to_archive")
def move_zip_to_archive(**context):
"""Move processed zip file to archive"""
ti = context['ti']
init_bundle = ti.xcom_pull(task_ids='init_workflow') or {}
zip_config = init_bundle.get('zip_config', {})
zip_key = zip_config.get("zip_object_name")
if not zip_key:
logging.warning("No zip key found, skipping archive")
return
client = _oci_client()
archive_key = zip_key.replace(OBJECT_PREFIX, f"{OBJECT_PREFIX}archive/", 1)
try:
client.copy_object(
OCI_NAMESPACE,
OCI_BUCKET,
{
"sourceObjectName": zip_key,
"destinationRegion": os.getenv("OCI_REGION", "eu-frankfurt-1"),
"destinationNamespace": OCI_NAMESPACE,
"destinationBucket": OCI_BUCKET,
"destinationObjectName": archive_key
}
)
logging.info("Copied to archive: %s -> %s", zip_key, archive_key)
client.delete_object(OCI_NAMESPACE, OCI_BUCKET, zip_key)
logging.info("Deleted from inbox: %s", zip_key)
except Exception as e:
logging.error("Failed to archive zip file %s: %s", zip_key, e)
raise
# Build the DAG structure
config_task = get_zip_config()
unzip_task = download_and_unzip(config_task)
init_task = init_workflow(unzip_task)
# Create CSV processing tasks dynamically
csv_tasks = []
for csv_config in CSV_CONFIGS:
task = PythonOperator(
task_id=csv_config["task_name"],
python_callable=run_mrds_task,
op_kwargs={
"task_config": {
"task_name": csv_config["task_name"],
"source_filename": csv_config["source_filename"],
"source_path": "{{ ti.xcom_pull(task_ids='init_workflow')['task_configs'] | selectattr('task_name', 'equalto', '" + csv_config["task_name"] + "') | map(attribute='source_path') | first }}",
"config_file": csv_config["config_yaml"],
}
},
provide_context=True,
)
csv_tasks.append(task)
finalize_task = PythonOperator(
task_id='finalize_workflow',
python_callable=finalise_workflow_task,
provide_context=True,
trigger_rule=TriggerRule.ALL_DONE,
retries=0,
)
cleanup_task = cleanup_temp_files()
archive_task = move_zip_to_archive()
all_good = EmptyOperator(
task_id="All_went_well",
trigger_rule=TriggerRule.ALL_SUCCESS,
)
# Define task dependencies
config_task >> unzip_task >> init_task >> csv_tasks >> finalize_task >> [cleanup_task, archive_task] >> all_good
logging.info("CSDB Ratings Full Processor DAG ready")