Files
mars/MARS_Packages/REL03/MARS-1005-PREHOOK/rollback_version/DATA_EXPORTER.pkg
Grzegorz Michalski 5ba6c30fda MARS-1005-PREHOOK
2026-03-11 10:34:47 +01:00

244 lines
13 KiB
Plaintext

create or replace PACKAGE CT_MRDS.DATA_EXPORTER
AUTHID CURRENT_USER
AS
/**
* Data Export Package: Provides comprehensive data export capabilities to various formats (CSV, Parquet)
* with support for cloud storage integration via Oracle Cloud Infrastructure (OCI).
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
* which returns documentation text for confluence page (to Copy-Paste it).
**/
-- Package Version Information
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.14.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-25 09:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (last 3-5 changes)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.14.0 (2026-02-25): OPTIMIZATION - Added pTaskName parameter to EXPORT_PARTITION_PARALLEL for deterministic filtering. Replaced FETCH FIRST 1 ROW ONLY safeguard with precise WHERE CHUNK_ID AND TASK_NAME filter. Eliminates ORDER BY overhead and provides cleaner session isolation.' || CHR(10) ||
'v2.13.1 (2026-02-25): CRITICAL FIX - Added START_ID and END_ID aliasses in CREATE_CHUNKS_BY_SQL to avoid ORA-00960 ambiguous column naming error.' || CHR(10) ||
'v2.13.0 (2026-02-25): CRITICAL SESSION ISOLATION FIX - Changed CREATE_CHUNKS_BY_NUMBER_COL to CREATE_CHUNKS_BY_SQL with TASK_NAME filter (fixes ORA-01422 in concurrent sessions). Added ORDER BY CREATED_DATE DESC FETCH FIRST 1 ROW safeguard to EXPORT_PARTITION_PARALLEL SELECT. Composite PK (TASK_NAME, CHUNK_ID) now fully functional.' || CHR(10) ||
'v2.12.0 (2026-02-24): CRITICAL FIX - Rewritten DELETE_FAILED_EXPORT_FILE to use file-specific pattern matching (prevents deleting parallel CSV chunks in shared folder). Added vQuery logging before DBMS_CLOUD calls. Added CSV maxfilesize logging.' || CHR(10) ||
'v2.11.0 (2026-02-18): Added pJobClass parameter to EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE for Oracle Scheduler job class support (resource/priority management).' || CHR(10);
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
vgMsgTmp VARCHAR2(32000);
---------------------------------------------------------------------------------------------------------------------------
-- TYPE DEFINITIONS FOR PARTITION HANDLING
---------------------------------------------------------------------------------------------------------------------------
/**
* Record type for year/month partition information
**/
TYPE partition_rec IS RECORD (
year VARCHAR2(4),
month VARCHAR2(2)
);
/**
* Table type for collection of partition records
**/
TYPE partition_tab IS TABLE OF partition_rec;
---------------------------------------------------------------------------------------------------------------------------
-- INTERNAL PARALLEL PROCESSING CALLBACK
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_PARTITION_PARALLEL
* @desc Internal callback procedure for DBMS_PARALLEL_EXECUTE.
* Processes single partition (year/month) chunk in parallel task.
* Called by DBMS_PARALLEL_EXECUTE framework for each chunk.
* This procedure is PUBLIC because DBMS_PARALLEL_EXECUTE requires it,
* but should NOT be called directly by external code.
* @param pStartId - Chunk start ID (CHUNK_ID from A_PARALLEL_EXPORT_CHUNKS table)
* @param pEndId - Chunk end ID (same as pStartId for single-row chunks)
* @param pTaskName - Task name for session isolation (optional, DEFAULT NULL for backward compatibility)
**/
PROCEDURE EXPORT_PARTITION_PARALLEL (
pStartId IN NUMBER,
pEndId IN NUMBER,
pTaskName IN VARCHAR2 DEFAULT NULL
);
---------------------------------------------------------------------------------------------------------------------------
-- MAIN EXPORT PROCEDURES
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_TABLE_DATA
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into single CSV file on OCI infrastructure.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* Supports template table for column order and per-column date formatting.
* When pRegisterExport=TRUE, successfully exported file is registered in:
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location, size, checksum, and metadata)
* @param pFileName - Optional filename (e.g., 'export.csv'). NULL = auto-generate from table name
* @param pTemplateTableName - Optional template table (SCHEMA.TABLE or TABLE) for:
* - Column order control (template defines CSV structure)
* - Per-column date formatting via FILE_MANAGER.GET_DATE_FORMAT
* - NULL = use source table columns in natural order
* @param pMaxFileSize - Maximum file size in bytes (default 104857600 = 100MB, min 10MB, max 1GB)
* @param pRegisterExport - When TRUE, registers exported CSV file in A_SOURCE_FILE_RECEIVED table
* @param pProcessName - Process name stored in PROCESS_NAME column (default 'DATA_EXPORTER')
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'csv_exports',
* pFileName => 'my_export.csv', -- Optional
* pTemplateTableName => 'CT_ET_TEMPLATES.MY_TEMPLATE', -- Optional
* pMaxFileSize => 104857600, -- Optional, default 100MB
* pRegisterExport => TRUE -- Optional, default FALSE
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 default NULL,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pProcessName IN VARCHAR2 default 'DATA_EXPORTER',
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_BY_DATE
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into PARQUET files on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
* Validates that all columns in pColumnList exist in the target table.
* Automatically adds 'T.' prefix to column names in pColumnList.
* Supports parallel partition processing via pParallelDegree parameter (default 1, range 1-16).
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'parquet_exports',
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pJobClass IN VARCHAR2 default NULL,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_TO_CSV_BY_DATE
* @desc Exports data to separate CSV files partitioned by year and month.
* Creates one CSV file for each year/month combination found in the data.
* Uses the same date filtering mechanism with CT_ODS.A_LOAD_HISTORY as EXPORT_TABLE_DATA_BY_DATE,
* but exports to CSV format instead of Parquet.
* Supports parallel partition processing via pParallelDegree parameter (1-16).
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
* When pRegisterExport=TRUE, successfully exported files are registered in:
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location, size, checksum, and metadata)
* @param pProcessName - Process name stored in PROCESS_NAME column (default 'DATA_EXPORTER')
* @example
* begin
* -- With custom filename
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'exports',
* pFileName => 'my_export.csv',
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8, -- Optional, default 1, range 1-16
* pRegisterExport => TRUE -- Optional, default FALSE, registers to A_SOURCE_FILE_RECEIVED
* );
*
* -- With auto-generated filename (based on table name only)
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'OU_TOP',
* pTableName => 'AGGREGATED_ALLOTMENT',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'ARCHIVE',
* pFolderName => 'exports',
* pMinDate => DATE '2025-09-01',
* pMaxDate => DATE '2025-09-17',
* pRegisterExport => TRUE -- Registers each export to A_SOURCE_FILE_RECEIVED table
* );
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_TO_CSV_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 DEFAULT NULL,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pProcessName IN VARCHAR2 default 'DATA_EXPORTER',
pJobClass IN VARCHAR2 default NULL,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
---------------------------------------------------------------------------------------------------------------------------
-- VERSION MANAGEMENT FUNCTIONS
---------------------------------------------------------------------------------------------------------------------------
/**
* Returns the current package version number
* return: Version string in format X.Y.Z (e.g., '2.1.0')
**/
FUNCTION GET_VERSION RETURN VARCHAR2;
/**
* Returns comprehensive build information including version, date, and author
* return: Formatted string with complete build details
**/
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
/**
* Returns the version history with recent changes
* return: Multi-line string with version history
**/
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
END;
/