Files
mars/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkg

240 lines
12 KiB
Plaintext

create or replace PACKAGE CT_MRDS.DATA_EXPORTER
AUTHID CURRENT_USER
AS
/**
* Data Export Package: Provides comprehensive data export capabilities to various formats (CSV, Parquet)
* with support for cloud storage integration via Oracle Cloud Infrastructure (OCI).
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
* which returns documentation text for confluence page (to Copy-Paste it).
**/
-- Package Version Information
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.9.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-13 14:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (last 3-5 changes)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.9.0 (2026-02-13): Added pProcessName parameter to EXPORT_TABLE_DATA and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures for process tracking in A_SOURCE_FILE_RECEIVED table.' || CHR(10) ||
'v2.8.1 (2026-02-12): FIX query in EXPORT_TABLE_DATA - removed A_LOAD_HISTORY join to ensure single file output (simple SELECT).' || CHR(10) ||
'v2.8.0 (2026-02-12): MAJOR REFACTOR - EXPORT_TABLE_DATA now exports to single CSV file instead of partitioning by key values. Added pFileName parameter.' || CHR(10) ||
'v2.7.5 (2026-02-11): Added pRegisterExport parameter to EXPORT_TABLE_DATA procedure. When TRUE, registers each exported CSV file in A_SOURCE_FILE_RECEIVED.' || CHR(10) ||
'v2.7.4 (2026-02-11): ACTUAL FILENAME STORAGE - Store real filename with Oracle suffix in SOURCE_FILE_NAME instead of theoretical filename.' || CHR(10);
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
vgMsgTmp VARCHAR2(32000);
---------------------------------------------------------------------------------------------------------------------------
-- TYPE DEFINITIONS FOR PARTITION HANDLING
---------------------------------------------------------------------------------------------------------------------------
/**
* Record type for year/month partition information
**/
TYPE partition_rec IS RECORD (
year VARCHAR2(4),
month VARCHAR2(2)
);
/**
* Table type for collection of partition records
**/
TYPE partition_tab IS TABLE OF partition_rec;
---------------------------------------------------------------------------------------------------------------------------
-- INTERNAL PARALLEL PROCESSING CALLBACK
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_PARTITION_PARALLEL
* @desc Internal callback procedure for DBMS_PARALLEL_EXECUTE.
* Processes single partition (year/month) chunk in parallel task.
* Called by DBMS_PARALLEL_EXECUTE framework for each chunk.
* This procedure is PUBLIC because DBMS_PARALLEL_EXECUTE requires it,
* but should NOT be called directly by external code.
* @param pStartId - Chunk start ID (CHUNK_ID from A_PARALLEL_EXPORT_CHUNKS table)
* @param pEndId - Chunk end ID (same as pStartId for single-row chunks)
**/
PROCEDURE EXPORT_PARTITION_PARALLEL (
pStartId IN NUMBER,
pEndId IN NUMBER
);
---------------------------------------------------------------------------------------------------------------------------
-- MAIN EXPORT PROCEDURES
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_TABLE_DATA
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into single CSV file on OCI infrastructure.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* Supports template table for column order and per-column date formatting.
* When pRegisterExport=TRUE, successfully exported file is registered in:
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location, size, checksum, and metadata)
* @param pFileName - Optional filename (e.g., 'export.csv'). NULL = auto-generate from table name
* @param pTemplateTableName - Optional template table (SCHEMA.TABLE or TABLE) for:
* - Column order control (template defines CSV structure)
* - Per-column date formatting via FILE_MANAGER.GET_DATE_FORMAT
* - NULL = use source table columns in natural order
* @param pMaxFileSize - Maximum file size in bytes (default 104857600 = 100MB, min 10MB, max 1GB)
* @param pRegisterExport - When TRUE, registers exported CSV file in A_SOURCE_FILE_RECEIVED table
* @param pProcessName - Process name stored in PROCESS_NAME column (default 'DATA_EXPORTER')
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'csv_exports',
* pFileName => 'my_export.csv', -- Optional
* pTemplateTableName => 'CT_ET_TEMPLATES.MY_TEMPLATE', -- Optional
* pMaxFileSize => 104857600, -- Optional, default 100MB
* pRegisterExport => TRUE -- Optional, default FALSE
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 default NULL,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pProcessName IN VARCHAR2 default 'DATA_EXPORTER',
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_BY_DATE
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into PARQUET files on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
* Validates that all columns in pColumnList exist in the target table.
* Automatically adds 'T.' prefix to column names in pColumnList.
* Supports parallel partition processing via pParallelDegree parameter (default 1, range 1-16).
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'parquet_exports',
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_TO_CSV_BY_DATE
* @desc Exports data to separate CSV files partitioned by year and month.
* Creates one CSV file for each year/month combination found in the data.
* Uses the same date filtering mechanism with CT_ODS.A_LOAD_HISTORY as EXPORT_TABLE_DATA_BY_DATE,
* but exports to CSV format instead of Parquet.
* Supports parallel partition processing via pParallelDegree parameter (1-16).
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
* When pRegisterExport=TRUE, successfully exported files are registered in:
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location, size, checksum, and metadata)
* @param pProcessName - Process name stored in PROCESS_NAME column (default 'DATA_EXPORTER')
* @example
* begin
* -- With custom filename
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'exports',
* pFileName => 'my_export.csv',
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8, -- Optional, default 1, range 1-16
* pRegisterExport => TRUE -- Optional, default FALSE, registers to A_SOURCE_FILE_RECEIVED
* );
*
* -- With auto-generated filename (based on table name only)
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'OU_TOP',
* pTableName => 'AGGREGATED_ALLOTMENT',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'ARCHIVE',
* pFolderName => 'exports',
* pMinDate => DATE '2025-09-01',
* pMaxDate => DATE '2025-09-17',
* pRegisterExport => TRUE -- Registers each export to A_SOURCE_FILE_RECEIVED table
* );
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_TO_CSV_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 DEFAULT NULL,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pProcessName IN VARCHAR2 default 'DATA_EXPORTER',
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
---------------------------------------------------------------------------------------------------------------------------
-- VERSION MANAGEMENT FUNCTIONS
---------------------------------------------------------------------------------------------------------------------------
/**
* Returns the current package version number
* return: Version string in format X.Y.Z (e.g., '2.1.0')
**/
FUNCTION GET_VERSION RETURN VARCHAR2;
/**
* Returns comprehensive build information including version, date, and author
* return: Formatted string with complete build details
**/
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
/**
* Returns the version history with recent changes
* return: Multi-line string with version history
**/
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
END;
/