Add DATA_EXPORTER package and update installation scripts for export registration
- Created new package CT_MRDS.DATA_EXPORTER (v2.6.3) for comprehensive data export capabilities, including CSV and Parquet formats with OCI integration. - Implemented version history tracking and enhanced error handling for export processes. - Updated installation scripts to include pRegisterExport parameter for registering exports in A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED.
This commit is contained in:
@@ -1004,6 +1004,9 @@ AS
|
|||||||
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
|
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
|
||||||
* Validates that all columns in pColumnList exist in the target table.
|
* Validates that all columns in pColumnList exist in the target table.
|
||||||
* Automatically adds 'T.' prefix to column names in pColumnList.
|
* Automatically adds 'T.' prefix to column names in pColumnList.
|
||||||
|
* When pRegisterExport=TRUE, successfully exported files are registered in:
|
||||||
|
* - CT_MRDS.A_WORKFLOW_HISTORY (one record per YEAR/MONTH with export timestamp)
|
||||||
|
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location and partition info)
|
||||||
* @example
|
* @example
|
||||||
* begin
|
* begin
|
||||||
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
|
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
|
||||||
@@ -1015,7 +1018,8 @@ AS
|
|||||||
* pFileName => 'my_export.csv',
|
* pFileName => 'my_export.csv',
|
||||||
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
|
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
|
||||||
* pMinDate => DATE '2024-01-01',
|
* pMinDate => DATE '2024-01-01',
|
||||||
* pMaxDate => SYSDATE
|
* pMaxDate => SYSDATE,
|
||||||
|
* pRegisterExport => TRUE -- Registers exports to tracking tables
|
||||||
* );
|
* );
|
||||||
* end;
|
* end;
|
||||||
**/
|
**/
|
||||||
@@ -1032,6 +1036,7 @@ AS
|
|||||||
pParallelDegree IN NUMBER default 1,
|
pParallelDegree IN NUMBER default 1,
|
||||||
pTemplateTableName IN VARCHAR2 default NULL,
|
pTemplateTableName IN VARCHAR2 default NULL,
|
||||||
pMaxFileSize IN NUMBER default 104857600,
|
pMaxFileSize IN NUMBER default 104857600,
|
||||||
|
pRegisterExport IN BOOLEAN default FALSE,
|
||||||
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
||||||
)
|
)
|
||||||
IS
|
IS
|
||||||
@@ -1045,6 +1050,10 @@ AS
|
|||||||
vBucketUri VARCHAR2(4000);
|
vBucketUri VARCHAR2(4000);
|
||||||
vCurrentCol VARCHAR2(128);
|
vCurrentCol VARCHAR2(128);
|
||||||
vPartitions partition_tab;
|
vPartitions partition_tab;
|
||||||
|
vWorkflowHistoryKey NUMBER;
|
||||||
|
vSourceFileReceivedKey NUMBER;
|
||||||
|
vFileName VARCHAR2(1000);
|
||||||
|
vFileUri VARCHAR2(4000);
|
||||||
|
|
||||||
BEGIN
|
BEGIN
|
||||||
vParameters := ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST( 'pSchemaName => '''||nvl(pSchemaName, 'NULL')||''''
|
vParameters := ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST( 'pSchemaName => '''||nvl(pSchemaName, 'NULL')||''''
|
||||||
@@ -1059,6 +1068,7 @@ AS
|
|||||||
,'pParallelDegree => '''||nvl(TO_CHAR(pParallelDegree), 'NULL')||''''
|
,'pParallelDegree => '''||nvl(TO_CHAR(pParallelDegree), 'NULL')||''''
|
||||||
,'pTemplateTableName => '''||nvl(pTemplateTableName, 'NULL')||''''
|
,'pTemplateTableName => '''||nvl(pTemplateTableName, 'NULL')||''''
|
||||||
,'pMaxFileSize => '''||nvl(TO_CHAR(pMaxFileSize), 'NULL')||''''
|
,'pMaxFileSize => '''||nvl(TO_CHAR(pMaxFileSize), 'NULL')||''''
|
||||||
|
,'pRegisterExport => '''||CASE WHEN pRegisterExport THEN 'TRUE' ELSE 'FALSE' END||''''
|
||||||
,'pCredentialName => '''||nvl(pCredentialName, 'NULL')||''''
|
,'pCredentialName => '''||nvl(pCredentialName, 'NULL')||''''
|
||||||
));
|
));
|
||||||
ENV_MANAGER.LOG_PROCESS_EVENT('Start','INFO', vParameters);
|
ENV_MANAGER.LOG_PROCESS_EVENT('Start','INFO', vParameters);
|
||||||
@@ -1135,6 +1145,58 @@ AS
|
|||||||
pMaxFileSize => pMaxFileSize,
|
pMaxFileSize => pMaxFileSize,
|
||||||
pParameters => vParameters
|
pParameters => vParameters
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Register export if requested
|
||||||
|
IF pRegisterExport THEN
|
||||||
|
-- Construct filename and URI for this partition
|
||||||
|
vFileName := NVL(vFileBaseName, UPPER(REPLACE(vTableName, vSchemaName || '.', ''))) || '_' || vPartitions(i).year || vPartitions(i).month || '.csv';
|
||||||
|
vFileUri := vBucketUri || CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || sanitizeFilename(vFileName);
|
||||||
|
|
||||||
|
-- Create A_WORKFLOW_HISTORY record for this export (one per year/month)
|
||||||
|
vWorkflowHistoryKey := CT_MRDS.A_WORKFLOW_HISTORY_KEY_SEQ.NEXTVAL;
|
||||||
|
INSERT INTO CT_MRDS.A_WORKFLOW_HISTORY (
|
||||||
|
A_WORKFLOW_HISTORY_KEY,
|
||||||
|
SERVICE_NAME,
|
||||||
|
ORCHESTRATION_RUN_ID,
|
||||||
|
WORKFLOW_NAME,
|
||||||
|
WORKFLOW_START,
|
||||||
|
WORKFLOW_END,
|
||||||
|
WORKFLOW_SUCCESSFUL
|
||||||
|
) VALUES (
|
||||||
|
vWorkflowHistoryKey,
|
||||||
|
'DATA_EXPORTER',
|
||||||
|
'CSV_EXPORT_' || TO_CHAR(SYSTIMESTAMP, 'YYYYMMDDHH24MISSFF'),
|
||||||
|
'DATA_EXPORT_' || UPPER(REPLACE(vTableName, vSchemaName || '.', '')) || '_' || vPartitions(i).year || vPartitions(i).month,
|
||||||
|
SYSTIMESTAMP,
|
||||||
|
SYSTIMESTAMP,
|
||||||
|
'Y'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create A_SOURCE_FILE_RECEIVED record for this export
|
||||||
|
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL;
|
||||||
|
INSERT INTO CT_MRDS.A_SOURCE_FILE_RECEIVED (
|
||||||
|
A_SOURCE_FILE_RECEIVED_KEY,
|
||||||
|
A_SOURCE_FILE_CONFIG_KEY,
|
||||||
|
SOURCE_FILE_NAME,
|
||||||
|
RECEPTION_DATE,
|
||||||
|
PROCESSING_STATUS,
|
||||||
|
PARTITION_YEAR,
|
||||||
|
PARTITION_MONTH,
|
||||||
|
ARCH_FILE_NAME
|
||||||
|
) VALUES (
|
||||||
|
vSourceFileReceivedKey,
|
||||||
|
-1, -- Special marker for exported files (no config)
|
||||||
|
vFileUri,
|
||||||
|
SYSDATE,
|
||||||
|
'ARCHIVED',
|
||||||
|
vPartitions(i).year,
|
||||||
|
vPartitions(i).month,
|
||||||
|
vFileName
|
||||||
|
);
|
||||||
|
|
||||||
|
COMMIT;
|
||||||
|
ENV_MANAGER.LOG_PROCESS_EVENT('Registered export: WorkflowKey=' || vWorkflowHistoryKey || ', FileReceivedKey=' || vSourceFileReceivedKey || ', File=' || vFileName, 'INFO', vParameters);
|
||||||
|
END IF;
|
||||||
END LOOP;
|
END LOOP;
|
||||||
|
|
||||||
-- Parallel processing (parallel degree > 1)
|
-- Parallel processing (parallel degree > 1)
|
||||||
@@ -1229,6 +1291,64 @@ AS
|
|||||||
-- Clean up task
|
-- Clean up task
|
||||||
DBMS_PARALLEL_EXECUTE.DROP_TASK(task_name => vTaskName);
|
DBMS_PARALLEL_EXECUTE.DROP_TASK(task_name => vTaskName);
|
||||||
|
|
||||||
|
-- Register exports if requested (after successful parallel processing)
|
||||||
|
IF pRegisterExport THEN
|
||||||
|
ENV_MANAGER.LOG_PROCESS_EVENT('Registering ' || vPartitions.COUNT || ' parallel exports to A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED', 'INFO', vParameters);
|
||||||
|
|
||||||
|
FOR i IN 1 .. vPartitions.COUNT LOOP
|
||||||
|
-- Construct filename and URI for this partition
|
||||||
|
vFileName := NVL(vFileBaseName, UPPER(REPLACE(vTableName, vSchemaName || '.', ''))) || '_' || vPartitions(i).year || vPartitions(i).month || '.csv';
|
||||||
|
vFileUri := vBucketUri || CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || sanitizeFilename(vFileName);
|
||||||
|
|
||||||
|
-- Create A_WORKFLOW_HISTORY record for this export (one per year/month)
|
||||||
|
vWorkflowHistoryKey := CT_MRDS.A_WORKFLOW_HISTORY_KEY_SEQ.NEXTVAL;
|
||||||
|
INSERT INTO CT_MRDS.A_WORKFLOW_HISTORY (
|
||||||
|
A_WORKFLOW_HISTORY_KEY,
|
||||||
|
SERVICE_NAME,
|
||||||
|
ORCHESTRATION_RUN_ID,
|
||||||
|
WORKFLOW_NAME,
|
||||||
|
WORKFLOW_START,
|
||||||
|
WORKFLOW_END,
|
||||||
|
WORKFLOW_SUCCESSFUL
|
||||||
|
) VALUES (
|
||||||
|
vWorkflowHistoryKey,
|
||||||
|
'DATA_EXPORTER',
|
||||||
|
'CSV_EXPORT_' || TO_CHAR(SYSTIMESTAMP, 'YYYYMMDDHH24MISSFF'),
|
||||||
|
'DATA_EXPORT_' || UPPER(REPLACE(vTableName, vSchemaName || '.', '')) || '_' || vPartitions(i).year || vPartitions(i).month,
|
||||||
|
SYSTIMESTAMP,
|
||||||
|
SYSTIMESTAMP,
|
||||||
|
'Y'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create A_SOURCE_FILE_RECEIVED record for this export
|
||||||
|
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL;
|
||||||
|
INSERT INTO CT_MRDS.A_SOURCE_FILE_RECEIVED (
|
||||||
|
A_SOURCE_FILE_RECEIVED_KEY,
|
||||||
|
A_SOURCE_FILE_CONFIG_KEY,
|
||||||
|
SOURCE_FILE_NAME,
|
||||||
|
RECEPTION_DATE,
|
||||||
|
PROCESSING_STATUS,
|
||||||
|
PARTITION_YEAR,
|
||||||
|
PARTITION_MONTH,
|
||||||
|
ARCH_FILE_NAME
|
||||||
|
) VALUES (
|
||||||
|
vSourceFileReceivedKey,
|
||||||
|
-1, -- Special marker for exported files (no config)
|
||||||
|
vFileUri,
|
||||||
|
SYSDATE,
|
||||||
|
'ARCHIVED',
|
||||||
|
vPartitions(i).year,
|
||||||
|
vPartitions(i).month,
|
||||||
|
vFileName
|
||||||
|
);
|
||||||
|
|
||||||
|
ENV_MANAGER.LOG_PROCESS_EVENT('Registered parallel export: WorkflowKey=' || vWorkflowHistoryKey || ', FileReceivedKey=' || vSourceFileReceivedKey || ', File=' || vFileName, 'DEBUG', vParameters);
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
COMMIT;
|
||||||
|
ENV_MANAGER.LOG_PROCESS_EVENT('Successfully registered all ' || vPartitions.COUNT || ' exports', 'INFO', vParameters);
|
||||||
|
END IF;
|
||||||
|
|
||||||
-- Clean up chunks for THIS specific task only (session-safe)
|
-- Clean up chunks for THIS specific task only (session-safe)
|
||||||
-- CRITICAL: Use TASK_NAME filter to avoid deleting chunks from other active CSV sessions
|
-- CRITICAL: Use TASK_NAME filter to avoid deleting chunks from other active CSV sessions
|
||||||
DELETE FROM CT_MRDS.A_PARALLEL_EXPORT_CHUNKS WHERE TASK_NAME = vTaskName;
|
DELETE FROM CT_MRDS.A_PARALLEL_EXPORT_CHUNKS WHERE TASK_NAME = vTaskName;
|
||||||
|
|||||||
@@ -8,26 +8,28 @@ AS
|
|||||||
* which returns documentation text for confluence page (to Copy-Paste it).
|
* which returns documentation text for confluence page (to Copy-Paste it).
|
||||||
**/
|
**/
|
||||||
|
|
||||||
-- Package Version Information
|
-- Package Version Information (Semantic Versioning: MAJOR.MINOR.PATCH)
|
||||||
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.6.3';
|
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.7.0';
|
||||||
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(19) := '2026-01-28 19:30:00';
|
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-09 20:00:00';
|
||||||
PACKAGE_AUTHOR CONSTANT VARCHAR2(50) := 'MRDS Development Team';
|
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
|
||||||
|
|
||||||
-- Version History (last 3-5 changes)
|
|
||||||
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
|
|
||||||
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || CHR(10) ||
|
|
||||||
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || CHR(10) ||
|
|
||||||
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || CHR(10) ||
|
|
||||||
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || CHR(10) ||
|
|
||||||
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || CHR(10) ||
|
|
||||||
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || CHR(10) ||
|
|
||||||
'v2.3.0 (2025-12-20): Added parallel partition processing using DBMS_PARALLEL_EXECUTE. New pParallelDegree parameter (1-16, default 1) for EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures. Each year/month partition processed in separate thread for improved performance.' || CHR(10) ||
|
|
||||||
'v2.2.0 (2025-12-19): DRY refactoring - extracted shared helper functions (sanitizeFilename, VALIDATE_TABLE_AND_COLUMNS, GET_PARTITIONS, EXPORT_SINGLE_PARTITION worker procedure). Reduced code duplication by ~400 lines. Prepared architecture for v2.3.0 parallel processing.' || CHR(10) ||
|
|
||||||
'v2.1.1 (2025-12-04): Fixed JOIN column reference A_WORKFLOW_HISTORY_KEY -> A_ETL_LOAD_SET_KEY, added consistent column mapping and dynamic column list to EXPORT_TABLE_DATA procedure, enhanced DEBUG logging for all export operations' || CHR(10) ||
|
|
||||||
'v2.1.0 (2025-10-22): Added version tracking and PARTITION_YEAR/PARTITION_MONTH support' || CHR(10) ||
|
|
||||||
'v2.0.0 (2025-10-01): Separated export functionality from FILE_MANAGER package' || CHR(10);
|
|
||||||
|
|
||||||
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
|
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
|
||||||
|
|
||||||
|
-- Version History (Latest changes first)
|
||||||
|
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
|
||||||
|
'v2.7.0 (2026-02-09): NEW FEATURE - Added pRegisterExport parameter to EXPORT_TABLE_DATA_TO_CSV_BY_DATE. When TRUE, successfully exported files are registered in A_WORKFLOW_HISTORY (one record per YEAR/MONTH) and A_SOURCE_FILE_RECEIVED tables for tracking and audit purposes.' || cgBL ||
|
||||||
|
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || cgBL ||
|
||||||
|
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || cgBL ||
|
||||||
|
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || cgBL ||
|
||||||
|
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || cgBL ||
|
||||||
|
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || cgBL ||
|
||||||
|
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || cgBL ||
|
||||||
|
'v2.3.0 (2025-12-20): Added parallel partition processing using DBMS_PARALLEL_EXECUTE. New pParallelDegree parameter (1-16, default 1) for EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures. Each year/month partition processed in separate thread for improved performance.' || cgBL ||
|
||||||
|
'v2.2.0 (2025-12-19): DRY refactoring - extracted shared helper functions (sanitizeFilename, VALIDATE_TABLE_AND_COLUMNS, GET_PARTITIONS, EXPORT_SINGLE_PARTITION worker procedure). Reduced code duplication by ~400 lines. Prepared architecture for v2.3.0 parallel processing.' || cgBL ||
|
||||||
|
'v2.1.1 (2025-12-04): Fixed JOIN column reference A_WORKFLOW_HISTORY_KEY -> A_ETL_LOAD_SET_KEY, added consistent column mapping and dynamic column list to EXPORT_TABLE_DATA procedure, enhanced DEBUG logging for all export operations' || cgBL ||
|
||||||
|
'v2.1.0 (2025-10-22): Added version tracking and PARTITION_YEAR/PARTITION_MONTH support' || cgBL ||
|
||||||
|
'v2.0.0 (2025-10-01): Separated export functionality from FILE_MANAGER package';
|
||||||
|
|
||||||
vgMsgTmp VARCHAR2(32000);
|
vgMsgTmp VARCHAR2(32000);
|
||||||
|
|
||||||
---------------------------------------------------------------------------------------------------------------------------
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
@@ -146,6 +148,9 @@ AS
|
|||||||
* but exports to CSV format instead of Parquet.
|
* but exports to CSV format instead of Parquet.
|
||||||
* Supports parallel partition processing via pParallelDegree parameter (1-16).
|
* Supports parallel partition processing via pParallelDegree parameter (1-16).
|
||||||
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
|
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
|
||||||
|
* When pRegisterExport=TRUE, successfully exported files are registered in:
|
||||||
|
* - CT_MRDS.A_WORKFLOW_HISTORY (one record per YEAR/MONTH with export timestamp)
|
||||||
|
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location and partition info)
|
||||||
* @example
|
* @example
|
||||||
* begin
|
* begin
|
||||||
* -- With custom filename
|
* -- With custom filename
|
||||||
@@ -158,7 +163,8 @@ AS
|
|||||||
* pFileName => 'my_export.csv',
|
* pFileName => 'my_export.csv',
|
||||||
* pMinDate => DATE '2024-01-01',
|
* pMinDate => DATE '2024-01-01',
|
||||||
* pMaxDate => SYSDATE,
|
* pMaxDate => SYSDATE,
|
||||||
* pParallelDegree => 8 -- Optional, default 1, range 1-16
|
* pParallelDegree => 8, -- Optional, default 1, range 1-16
|
||||||
|
* pRegisterExport => TRUE -- Optional, default FALSE, registers to A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED
|
||||||
* );
|
* );
|
||||||
*
|
*
|
||||||
* -- With auto-generated filename (based on table name only)
|
* -- With auto-generated filename (based on table name only)
|
||||||
@@ -169,7 +175,8 @@ AS
|
|||||||
* pBucketArea => 'ARCHIVE',
|
* pBucketArea => 'ARCHIVE',
|
||||||
* pFolderName => 'exports',
|
* pFolderName => 'exports',
|
||||||
* pMinDate => DATE '2025-09-01',
|
* pMinDate => DATE '2025-09-01',
|
||||||
* pMaxDate => DATE '2025-09-17'
|
* pMaxDate => DATE '2025-09-17',
|
||||||
|
* pRegisterExport => TRUE -- Registers each export to tracking tables
|
||||||
* );
|
* );
|
||||||
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
|
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
|
||||||
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
|
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
|
||||||
@@ -188,6 +195,7 @@ AS
|
|||||||
pParallelDegree IN NUMBER default 1,
|
pParallelDegree IN NUMBER default 1,
|
||||||
pTemplateTableName IN VARCHAR2 default NULL,
|
pTemplateTableName IN VARCHAR2 default NULL,
|
||||||
pMaxFileSize IN NUMBER default 104857600,
|
pMaxFileSize IN NUMBER default 104857600,
|
||||||
|
pRegisterExport IN BOOLEAN default FALSE,
|
||||||
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,218 @@
|
|||||||
|
create or replace PACKAGE CT_MRDS.DATA_EXPORTER
|
||||||
|
AUTHID CURRENT_USER
|
||||||
|
AS
|
||||||
|
/**
|
||||||
|
* Data Export Package: Provides comprehensive data export capabilities to various formats (CSV, Parquet)
|
||||||
|
* with support for cloud storage integration via Oracle Cloud Infrastructure (OCI).
|
||||||
|
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
|
||||||
|
* which returns documentation text for confluence page (to Copy-Paste it).
|
||||||
|
**/
|
||||||
|
|
||||||
|
-- Package Version Information
|
||||||
|
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.6.3';
|
||||||
|
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(19) := '2026-01-28 19:30:00';
|
||||||
|
PACKAGE_AUTHOR CONSTANT VARCHAR2(50) := 'MRDS Development Team';
|
||||||
|
|
||||||
|
-- Version History (last 3-5 changes)
|
||||||
|
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
|
||||||
|
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || CHR(10) ||
|
||||||
|
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || CHR(10) ||
|
||||||
|
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || CHR(10) ||
|
||||||
|
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || CHR(10) ||
|
||||||
|
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || CHR(10) ||
|
||||||
|
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || CHR(10) ||
|
||||||
|
'v2.3.0 (2025-12-20): Added parallel partition processing using DBMS_PARALLEL_EXECUTE. New pParallelDegree parameter (1-16, default 1) for EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures. Each year/month partition processed in separate thread for improved performance.' || CHR(10) ||
|
||||||
|
'v2.2.0 (2025-12-19): DRY refactoring - extracted shared helper functions (sanitizeFilename, VALIDATE_TABLE_AND_COLUMNS, GET_PARTITIONS, EXPORT_SINGLE_PARTITION worker procedure). Reduced code duplication by ~400 lines. Prepared architecture for v2.3.0 parallel processing.' || CHR(10) ||
|
||||||
|
'v2.1.1 (2025-12-04): Fixed JOIN column reference A_WORKFLOW_HISTORY_KEY -> A_ETL_LOAD_SET_KEY, added consistent column mapping and dynamic column list to EXPORT_TABLE_DATA procedure, enhanced DEBUG logging for all export operations' || CHR(10) ||
|
||||||
|
'v2.1.0 (2025-10-22): Added version tracking and PARTITION_YEAR/PARTITION_MONTH support' || CHR(10) ||
|
||||||
|
'v2.0.0 (2025-10-01): Separated export functionality from FILE_MANAGER package' || CHR(10);
|
||||||
|
|
||||||
|
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
|
||||||
|
vgMsgTmp VARCHAR2(32000);
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
-- TYPE DEFINITIONS FOR PARTITION HANDLING
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record type for year/month partition information
|
||||||
|
**/
|
||||||
|
TYPE partition_rec IS RECORD (
|
||||||
|
year VARCHAR2(4),
|
||||||
|
month VARCHAR2(2)
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Table type for collection of partition records
|
||||||
|
**/
|
||||||
|
TYPE partition_tab IS TABLE OF partition_rec;
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
-- INTERNAL PARALLEL PROCESSING CALLBACK
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @name EXPORT_PARTITION_PARALLEL
|
||||||
|
* @desc Internal callback procedure for DBMS_PARALLEL_EXECUTE.
|
||||||
|
* Processes single partition (year/month) chunk in parallel task.
|
||||||
|
* Called by DBMS_PARALLEL_EXECUTE framework for each chunk.
|
||||||
|
* This procedure is PUBLIC because DBMS_PARALLEL_EXECUTE requires it,
|
||||||
|
* but should NOT be called directly by external code.
|
||||||
|
* @param pStartId - Chunk start ID (CHUNK_ID from A_PARALLEL_EXPORT_CHUNKS table)
|
||||||
|
* @param pEndId - Chunk end ID (same as pStartId for single-row chunks)
|
||||||
|
**/
|
||||||
|
PROCEDURE EXPORT_PARTITION_PARALLEL (
|
||||||
|
pStartId IN NUMBER,
|
||||||
|
pEndId IN NUMBER
|
||||||
|
);
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
-- MAIN EXPORT PROCEDURES
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @name EXPORT_TABLE_DATA
|
||||||
|
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
|
||||||
|
* Exports data into CSV file on OCI infrustructure.
|
||||||
|
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
|
||||||
|
* @example
|
||||||
|
* begin
|
||||||
|
* DATA_EXPORTER.EXPORT_TABLE_DATA(
|
||||||
|
* pSchemaName => 'CT_MRDS',
|
||||||
|
* pTableName => 'MY_TABLE',
|
||||||
|
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
|
||||||
|
* pBucketArea => 'DATA',
|
||||||
|
* pFolderName => 'csv_exports'
|
||||||
|
* );
|
||||||
|
* end;
|
||||||
|
**/
|
||||||
|
PROCEDURE EXPORT_TABLE_DATA (
|
||||||
|
pSchemaName IN VARCHAR2,
|
||||||
|
pTableName IN VARCHAR2,
|
||||||
|
pKeyColumnName IN VARCHAR2,
|
||||||
|
pBucketArea IN VARCHAR2,
|
||||||
|
pFolderName IN VARCHAR2,
|
||||||
|
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @name EXPORT_TABLE_DATA_BY_DATE
|
||||||
|
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
|
||||||
|
* Exports data into PARQUET files on OCI infrustructure.
|
||||||
|
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
|
||||||
|
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
|
||||||
|
* Validates that all columns in pColumnList exist in the target table.
|
||||||
|
* Automatically adds 'T.' prefix to column names in pColumnList.
|
||||||
|
* Supports parallel partition processing via pParallelDegree parameter (default 1, range 1-16).
|
||||||
|
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
|
||||||
|
* @example
|
||||||
|
* begin
|
||||||
|
* DATA_EXPORTER.EXPORT_TABLE_DATA_BY_DATE(
|
||||||
|
* pSchemaName => 'CT_MRDS',
|
||||||
|
* pTableName => 'MY_TABLE',
|
||||||
|
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
|
||||||
|
* pBucketArea => 'DATA',
|
||||||
|
* pFolderName => 'parquet_exports',
|
||||||
|
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
|
||||||
|
* pMinDate => DATE '2024-01-01',
|
||||||
|
* pMaxDate => SYSDATE,
|
||||||
|
* pParallelDegree => 8 -- Optional, default 1, range 1-16
|
||||||
|
* );
|
||||||
|
* end;
|
||||||
|
**/
|
||||||
|
PROCEDURE EXPORT_TABLE_DATA_BY_DATE (
|
||||||
|
pSchemaName IN VARCHAR2,
|
||||||
|
pTableName IN VARCHAR2,
|
||||||
|
pKeyColumnName IN VARCHAR2,
|
||||||
|
pBucketArea IN VARCHAR2,
|
||||||
|
pFolderName IN VARCHAR2,
|
||||||
|
pColumnList IN VARCHAR2 default NULL,
|
||||||
|
pMinDate IN DATE default DATE '1900-01-01',
|
||||||
|
pMaxDate IN DATE default SYSDATE,
|
||||||
|
pParallelDegree IN NUMBER default 1,
|
||||||
|
pTemplateTableName IN VARCHAR2 default NULL,
|
||||||
|
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @name EXPORT_TABLE_DATA_TO_CSV_BY_DATE
|
||||||
|
* @desc Exports data to separate CSV files partitioned by year and month.
|
||||||
|
* Creates one CSV file for each year/month combination found in the data.
|
||||||
|
* Uses the same date filtering mechanism with CT_ODS.A_LOAD_HISTORY as EXPORT_TABLE_DATA_BY_DATE,
|
||||||
|
* but exports to CSV format instead of Parquet.
|
||||||
|
* Supports parallel partition processing via pParallelDegree parameter (1-16).
|
||||||
|
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
|
||||||
|
* @example
|
||||||
|
* begin
|
||||||
|
* -- With custom filename
|
||||||
|
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
|
||||||
|
* pSchemaName => 'CT_MRDS',
|
||||||
|
* pTableName => 'MY_TABLE',
|
||||||
|
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
|
||||||
|
* pBucketArea => 'DATA',
|
||||||
|
* pFolderName => 'exports',
|
||||||
|
* pFileName => 'my_export.csv',
|
||||||
|
* pMinDate => DATE '2024-01-01',
|
||||||
|
* pMaxDate => SYSDATE,
|
||||||
|
* pParallelDegree => 8 -- Optional, default 1, range 1-16
|
||||||
|
* );
|
||||||
|
*
|
||||||
|
* -- With auto-generated filename (based on table name only)
|
||||||
|
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
|
||||||
|
* pSchemaName => 'OU_TOP',
|
||||||
|
* pTableName => 'AGGREGATED_ALLOTMENT',
|
||||||
|
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
|
||||||
|
* pBucketArea => 'ARCHIVE',
|
||||||
|
* pFolderName => 'exports',
|
||||||
|
* pMinDate => DATE '2025-09-01',
|
||||||
|
* pMaxDate => DATE '2025-09-17'
|
||||||
|
* );
|
||||||
|
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
|
||||||
|
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
|
||||||
|
* end;
|
||||||
|
**/
|
||||||
|
PROCEDURE EXPORT_TABLE_DATA_TO_CSV_BY_DATE (
|
||||||
|
pSchemaName IN VARCHAR2,
|
||||||
|
pTableName IN VARCHAR2,
|
||||||
|
pKeyColumnName IN VARCHAR2,
|
||||||
|
pBucketArea IN VARCHAR2,
|
||||||
|
pFolderName IN VARCHAR2,
|
||||||
|
pFileName IN VARCHAR2 DEFAULT NULL,
|
||||||
|
pColumnList IN VARCHAR2 default NULL,
|
||||||
|
pMinDate IN DATE default DATE '1900-01-01',
|
||||||
|
pMaxDate IN DATE default SYSDATE,
|
||||||
|
pParallelDegree IN NUMBER default 1,
|
||||||
|
pTemplateTableName IN VARCHAR2 default NULL,
|
||||||
|
pMaxFileSize IN NUMBER default 104857600,
|
||||||
|
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
|
||||||
|
);
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
-- VERSION MANAGEMENT FUNCTIONS
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current package version number
|
||||||
|
* return: Version string in format X.Y.Z (e.g., '2.1.0')
|
||||||
|
**/
|
||||||
|
FUNCTION GET_VERSION RETURN VARCHAR2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns comprehensive build information including version, date, and author
|
||||||
|
* return: Formatted string with complete build details
|
||||||
|
**/
|
||||||
|
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the version history with recent changes
|
||||||
|
* return: Multi-line string with version history
|
||||||
|
**/
|
||||||
|
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
|
||||||
|
|
||||||
|
END;
|
||||||
|
|
||||||
|
/
|
||||||
@@ -106,7 +106,8 @@ BEGIN
|
|||||||
pMaxDate => SYSDATE,
|
pMaxDate => SYSDATE,
|
||||||
pParallelDegree => 16,
|
pParallelDegree => 16,
|
||||||
pTemplateTableName => 'CT_ET_TEMPLATES.CSDB_DEBT',
|
pTemplateTableName => 'CT_ET_TEMPLATES.CSDB_DEBT',
|
||||||
pMaxFileSize => 104857600 -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
|
pMaxFileSize => 104857600, -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
|
||||||
|
pRegisterExport => TRUE -- Register exports in A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED
|
||||||
);
|
);
|
||||||
|
|
||||||
DBMS_OUTPUT.PUT_LINE('SUCCESS: LEGACY_DEBT exported to DATA bucket with template column order');
|
DBMS_OUTPUT.PUT_LINE('SUCCESS: LEGACY_DEBT exported to DATA bucket with template column order');
|
||||||
@@ -225,7 +226,8 @@ BEGIN
|
|||||||
pMaxDate => SYSDATE,
|
pMaxDate => SYSDATE,
|
||||||
pParallelDegree => 16,
|
pParallelDegree => 16,
|
||||||
pTemplateTableName => 'CT_ET_TEMPLATES.CSDB_DEBT_DAILY',
|
pTemplateTableName => 'CT_ET_TEMPLATES.CSDB_DEBT_DAILY',
|
||||||
pMaxFileSize => 104857600 -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
|
pMaxFileSize => 104857600, -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
|
||||||
|
pRegisterExport => TRUE -- Register exports in A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED
|
||||||
);
|
);
|
||||||
|
|
||||||
DBMS_OUTPUT.PUT_LINE('SUCCESS: LEGACY_DEBT_DAILY exported to DATA bucket with template column order');
|
DBMS_OUTPUT.PUT_LINE('SUCCESS: LEGACY_DEBT_DAILY exported to DATA bucket with template column order');
|
||||||
|
|||||||
Reference in New Issue
Block a user