Files
mars/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkg
Grzegorz Michalski 6c8b22eac9 Add DATA_EXPORTER package and update installation scripts for export registration
- Created new package CT_MRDS.DATA_EXPORTER (v2.6.3) for comprehensive data export capabilities, including CSV and Parquet formats with OCI integration.
- Implemented version history tracking and enhanced error handling for export processes.
- Updated installation scripts to include pRegisterExport parameter for registering exports in A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED.
2026-02-10 09:33:46 +01:00

227 lines
13 KiB
Plaintext

create or replace PACKAGE CT_MRDS.DATA_EXPORTER
AUTHID CURRENT_USER
AS
/**
* Data Export Package: Provides comprehensive data export capabilities to various formats (CSV, Parquet)
* with support for cloud storage integration via Oracle Cloud Infrastructure (OCI).
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
* which returns documentation text for confluence page (to Copy-Paste it).
**/
-- Package Version Information (Semantic Versioning: MAJOR.MINOR.PATCH)
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.7.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-09 20:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
-- Version History (Latest changes first)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.7.0 (2026-02-09): NEW FEATURE - Added pRegisterExport parameter to EXPORT_TABLE_DATA_TO_CSV_BY_DATE. When TRUE, successfully exported files are registered in A_WORKFLOW_HISTORY (one record per YEAR/MONTH) and A_SOURCE_FILE_RECEIVED tables for tracking and audit purposes.' || cgBL ||
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || cgBL ||
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || cgBL ||
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || cgBL ||
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || cgBL ||
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || cgBL ||
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || cgBL ||
'v2.3.0 (2025-12-20): Added parallel partition processing using DBMS_PARALLEL_EXECUTE. New pParallelDegree parameter (1-16, default 1) for EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures. Each year/month partition processed in separate thread for improved performance.' || cgBL ||
'v2.2.0 (2025-12-19): DRY refactoring - extracted shared helper functions (sanitizeFilename, VALIDATE_TABLE_AND_COLUMNS, GET_PARTITIONS, EXPORT_SINGLE_PARTITION worker procedure). Reduced code duplication by ~400 lines. Prepared architecture for v2.3.0 parallel processing.' || cgBL ||
'v2.1.1 (2025-12-04): Fixed JOIN column reference A_WORKFLOW_HISTORY_KEY -> A_ETL_LOAD_SET_KEY, added consistent column mapping and dynamic column list to EXPORT_TABLE_DATA procedure, enhanced DEBUG logging for all export operations' || cgBL ||
'v2.1.0 (2025-10-22): Added version tracking and PARTITION_YEAR/PARTITION_MONTH support' || cgBL ||
'v2.0.0 (2025-10-01): Separated export functionality from FILE_MANAGER package';
vgMsgTmp VARCHAR2(32000);
---------------------------------------------------------------------------------------------------------------------------
-- TYPE DEFINITIONS FOR PARTITION HANDLING
---------------------------------------------------------------------------------------------------------------------------
/**
* Record type for year/month partition information
**/
TYPE partition_rec IS RECORD (
year VARCHAR2(4),
month VARCHAR2(2)
);
/**
* Table type for collection of partition records
**/
TYPE partition_tab IS TABLE OF partition_rec;
---------------------------------------------------------------------------------------------------------------------------
-- INTERNAL PARALLEL PROCESSING CALLBACK
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_PARTITION_PARALLEL
* @desc Internal callback procedure for DBMS_PARALLEL_EXECUTE.
* Processes single partition (year/month) chunk in parallel task.
* Called by DBMS_PARALLEL_EXECUTE framework for each chunk.
* This procedure is PUBLIC because DBMS_PARALLEL_EXECUTE requires it,
* but should NOT be called directly by external code.
* @param pStartId - Chunk start ID (CHUNK_ID from A_PARALLEL_EXPORT_CHUNKS table)
* @param pEndId - Chunk end ID (same as pStartId for single-row chunks)
**/
PROCEDURE EXPORT_PARTITION_PARALLEL (
pStartId IN NUMBER,
pEndId IN NUMBER
);
---------------------------------------------------------------------------------------------------------------------------
-- MAIN EXPORT PROCEDURES
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_TABLE_DATA
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into CSV file on OCI infrustructure.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'csv_exports'
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_BY_DATE
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into PARQUET files on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
* Validates that all columns in pColumnList exist in the target table.
* Automatically adds 'T.' prefix to column names in pColumnList.
* Supports parallel partition processing via pParallelDegree parameter (default 1, range 1-16).
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'parquet_exports',
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_TO_CSV_BY_DATE
* @desc Exports data to separate CSV files partitioned by year and month.
* Creates one CSV file for each year/month combination found in the data.
* Uses the same date filtering mechanism with CT_ODS.A_LOAD_HISTORY as EXPORT_TABLE_DATA_BY_DATE,
* but exports to CSV format instead of Parquet.
* Supports parallel partition processing via pParallelDegree parameter (1-16).
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
* When pRegisterExport=TRUE, successfully exported files are registered in:
* - CT_MRDS.A_WORKFLOW_HISTORY (one record per YEAR/MONTH with export timestamp)
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location and partition info)
* @example
* begin
* -- With custom filename
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'exports',
* pFileName => 'my_export.csv',
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8, -- Optional, default 1, range 1-16
* pRegisterExport => TRUE -- Optional, default FALSE, registers to A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED
* );
*
* -- With auto-generated filename (based on table name only)
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'OU_TOP',
* pTableName => 'AGGREGATED_ALLOTMENT',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'ARCHIVE',
* pFolderName => 'exports',
* pMinDate => DATE '2025-09-01',
* pMaxDate => DATE '2025-09-17',
* pRegisterExport => TRUE -- Registers each export to tracking tables
* );
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_TO_CSV_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 DEFAULT NULL,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
---------------------------------------------------------------------------------------------------------------------------
-- VERSION MANAGEMENT FUNCTIONS
---------------------------------------------------------------------------------------------------------------------------
/**
* Returns the current package version number
* return: Version string in format X.Y.Z (e.g., '2.1.0')
**/
FUNCTION GET_VERSION RETURN VARCHAR2;
/**
* Returns comprehensive build information including version, date, and author
* return: Formatted string with complete build details
**/
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
/**
* Returns the version history with recent changes
* return: Multi-line string with version history
**/
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
END;
/