Compare commits

...

6 Commits

Author SHA1 Message Date
Grzegorz Michalski
3171ff2ddf Update DATA_EXPORTER package to version 2.7.4, enhancing filename storage and metadata retrieval logic for exported files. 2026-02-11 18:27:44 +01:00
Grzegorz Michalski
d237c2d7aa pRegisterExport 2026-02-11 09:54:04 +01:00
Grzegorz Michalski
96e3e2f845 pRegisterExport 2026-02-11 09:53:53 +01:00
Grzegorz Michalski
3eb39091de dodanie: pRegisterExport => TRUE 2026-02-11 09:49:28 +01:00
Grzegorz Michalski
113b9f55e3 deprecated version 2.8.0 2026-02-11 09:45:11 +01:00
Grzegorz Michalski
6c8b22eac9 Add DATA_EXPORTER package and update installation scripts for export registration
- Created new package CT_MRDS.DATA_EXPORTER (v2.6.3) for comprehensive data export capabilities, including CSV and Parquet formats with OCI integration.
- Implemented version history tracking and enhanced error handling for export processes.
- Updated installation scripts to include pRegisterExport parameter for registering exports in A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED.
2026-02-10 09:33:46 +01:00
7 changed files with 3408 additions and 8 deletions

View File

@@ -1032,6 +1032,7 @@ AS
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
)
IS
@@ -1045,6 +1046,15 @@ AS
vBucketUri VARCHAR2(4000);
vCurrentCol VARCHAR2(128);
vPartitions partition_tab;
vSourceFileReceivedKey NUMBER;
vFileName VARCHAR2(1000);
vFileUri VARCHAR2(4000);
-- Variables for A_SOURCE_FILE_CONFIG lookup
vSourceKey VARCHAR2(100);
vTableId VARCHAR2(200);
vConfigKey NUMBER := -1;
vSlashPos1 NUMBER;
vSlashPos2 NUMBER;
BEGIN
vParameters := ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST( 'pSchemaName => '''||nvl(pSchemaName, 'NULL')||''''
@@ -1059,6 +1069,7 @@ AS
,'pParallelDegree => '''||nvl(TO_CHAR(pParallelDegree), 'NULL')||''''
,'pTemplateTableName => '''||nvl(pTemplateTableName, 'NULL')||''''
,'pMaxFileSize => '''||nvl(TO_CHAR(pMaxFileSize), 'NULL')||''''
,'pRegisterExport => '''||CASE WHEN pRegisterExport THEN 'TRUE' ELSE 'FALSE' END||''''
,'pCredentialName => '''||nvl(pCredentialName, 'NULL')||''''
));
ENV_MANAGER.LOG_PROCESS_EVENT('Start','INFO', vParameters);
@@ -1250,6 +1261,175 @@ AS
END;
END IF;
END IF;
-- Note: File registration handled by EXPORT_SINGLE_PARTITION when pRegisterExport=TRUE
-- Each partition calls pRegisterExport logic independently during serial/parallel execution
-- Register exported files to A_SOURCE_FILE_RECEIVED if requested (after successful export)
IF pRegisterExport THEN
-- Lookup A_SOURCE_FILE_CONFIG_KEY based on pFolderName parsing
-- Format: {BUCKET_AREA}/{SOURCE_KEY}/{TABLE_ID}
-- Example: 'ODS/CSDB/CSDB_DEBT_DAILY' -> SOURCE_KEY='CSDB', TABLE_ID='CSDB_DEBT_DAILY'
-- Parse pFolderName to extract SOURCE_KEY and TABLE_ID
vSlashPos1 := INSTR(pFolderName, '/', 1, 1); -- First '/' position
vSlashPos2 := INSTR(pFolderName, '/', 1, 2); -- Second '/' position
IF vSlashPos1 > 0 AND vSlashPos2 > 0 THEN
-- Extract segment 2 (SOURCE_KEY) and segment 3 (TABLE_ID)
vSourceKey := SUBSTR(pFolderName, vSlashPos1 + 1, vSlashPos2 - vSlashPos1 - 1);
vTableId := SUBSTR(pFolderName, vSlashPos2 + 1);
-- Find configuration based on SOURCE_KEY and TABLE_ID
BEGIN
SELECT A_SOURCE_FILE_CONFIG_KEY
INTO vConfigKey
FROM CT_MRDS.A_SOURCE_FILE_CONFIG
WHERE A_SOURCE_KEY = vSourceKey
AND TABLE_ID = vTableId
AND SOURCE_FILE_TYPE = 'INPUT'
AND ROWNUM = 1;
ENV_MANAGER.LOG_PROCESS_EVENT('Found config key: ' || vConfigKey || ' for SOURCE=' || vSourceKey || ', TABLE=' || vTableId, 'DEBUG', vParameters);
EXCEPTION
WHEN NO_DATA_FOUND THEN
vConfigKey := -1;
ENV_MANAGER.LOG_PROCESS_EVENT('No config found for SOURCE=' || vSourceKey || ', TABLE=' || vTableId || ' - using default (-1)', 'INFO', vParameters);
END;
ELSE
-- Cannot parse folder name - use default
vConfigKey := -1;
ENV_MANAGER.LOG_PROCESS_EVENT('Cannot parse pFolderName: ' || pFolderName || ' - using default (-1)', 'WARNING', vParameters);
END IF;
ENV_MANAGER.LOG_PROCESS_EVENT('Registering ' || vPartitions.COUNT || ' exported files to A_SOURCE_FILE_RECEIVED with config key: ' || vConfigKey, 'INFO', vParameters);
FOR i IN 1 .. vPartitions.COUNT LOOP
-- Construct filename and URI for this partition
vFileName := NVL(vFileBaseName, UPPER(REPLACE(vTableName, vSchemaName || '.', ''))) || '_' || vPartitions(i).year || vPartitions(i).month || '.csv';
vFileUri := vBucketUri || CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || sanitizeFilename(vFileName);
-- Get file metadata from OCI bucket (CHECKSUM, CREATED, BYTES) with retry logic
DECLARE
vChecksum VARCHAR2(128);
vCreated TIMESTAMP WITH TIME ZONE;
vBytes NUMBER;
vActualFileName VARCHAR2(1000); -- Actual filename with Oracle suffix
vSanitizedFileName VARCHAR2(1000);
vRetryCount NUMBER := 0;
vMaxRetries NUMBER := 1; -- One retry after initial attempt
vRetryDelay NUMBER := 2; -- 2 seconds delay
BEGIN
-- Sanitize filename first (PL/SQL function cannot be used directly in SQL)
vSanitizedFileName := sanitizeFilename(vFileName);
-- Remove .csv extension for LIKE pattern matching (Oracle adds suffixes BEFORE .csv)
-- Example: LEGACY_DEBT_202508.csv becomes LEGACY_DEBT_202508_1_20260211T102621591769Z.csv
vSanitizedFileName := REGEXP_REPLACE(vSanitizedFileName, '\.csv$', '', 1, 0, 'i');
-- Try to get file metadata with retry logic
<<metadata_retry_loop>>
LOOP
BEGIN
SELECT object_name, checksum, created, bytes
INTO vActualFileName, vChecksum, vCreated, vBytes
FROM TABLE(DBMS_CLOUD.LIST_OBJECTS(
credential_name => pCredentialName,
location_uri => vBucketUri
))
WHERE object_name LIKE CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || vSanitizedFileName || '%'
ORDER BY created DESC, bytes DESC
FETCH FIRST 1 ROW ONLY;
-- Extract filename only from full path (remove bucket folder prefix)
-- vActualFileName contains: 'ODS/CSDB/CSDB_DEBT/LEGACY_DEBT_202508_1_20260211T111341375171Z.csv'
-- Extract only: 'LEGACY_DEBT_202508_1_20260211T111341375171Z.csv'
vActualFileName := SUBSTR(vActualFileName, INSTR(vActualFileName, '/', -1) + 1);
-- Success - exit retry loop
EXIT metadata_retry_loop;
EXCEPTION
WHEN NO_DATA_FOUND THEN
vRetryCount := vRetryCount + 1;
IF vRetryCount <= vMaxRetries THEN
-- Log retry attempt
ENV_MANAGER.LOG_PROCESS_EVENT('File not found in bucket (attempt ' || vRetryCount || '/' || (vMaxRetries + 1) || '), retrying after ' || vRetryDelay || ' seconds: ' || vFileName, 'DEBUG', vParameters);
-- Wait before retry using DBMS_SESSION.SLEEP (alternative to DBMS_LOCK)
DBMS_SESSION.SLEEP(vRetryDelay);
ELSE
-- Max retries exceeded - re-raise exception
RAISE;
END IF;
END;
END LOOP metadata_retry_loop;
-- Create A_SOURCE_FILE_RECEIVED record for this export with metadata
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL;
INSERT INTO CT_MRDS.A_SOURCE_FILE_RECEIVED (
A_SOURCE_FILE_RECEIVED_KEY,
A_SOURCE_FILE_CONFIG_KEY,
SOURCE_FILE_NAME,
CHECKSUM,
CREATED,
BYTES,
RECEPTION_DATE,
PROCESSING_STATUS,
PARTITION_YEAR,
PARTITION_MONTH,
ARCH_FILE_NAME
) VALUES (
vSourceFileReceivedKey,
vConfigKey, -- Config key from A_SOURCE_FILE_CONFIG lookup
vActualFileName, -- Use actual filename with Oracle suffix
vChecksum,
vCreated,
vBytes,
SYSDATE,
'INGESTED',
NULL, -- PARTITION_YEAR not used for CSV exports
NULL, -- PARTITION_MONTH not used for CSV exports
NULL -- ARCH_FILE_NAME not used for CSV exports
);
ENV_MANAGER.LOG_PROCESS_EVENT('Registered file: FileReceivedKey=' || vSourceFileReceivedKey || ', File=' || vActualFileName || ', Size=' || vBytes || ' bytes', 'DEBUG', vParameters);
EXCEPTION
WHEN NO_DATA_FOUND THEN
-- File not found after retries - log warning and continue without metadata
ENV_MANAGER.LOG_PROCESS_EVENT('WARNING: File not found in bucket after ' || (vMaxRetries + 1) || ' attempts: ' || vFileName, 'WARNING', vParameters);
-- Sanitize filename for fallback INSERT (function cannot be used in SQL)
vSanitizedFileName := sanitizeFilename(vFileName);
-- Insert without metadata
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL;
INSERT INTO CT_MRDS.A_SOURCE_FILE_RECEIVED (
A_SOURCE_FILE_RECEIVED_KEY,
A_SOURCE_FILE_CONFIG_KEY,
SOURCE_FILE_NAME,
RECEPTION_DATE,
PROCESSING_STATUS,
PARTITION_YEAR,
PARTITION_MONTH,
ARCH_FILE_NAME
) VALUES (
vSourceFileReceivedKey,
vConfigKey, -- Config key from A_SOURCE_FILE_CONFIG lookup
vSanitizedFileName, -- Fallback: use theoretical filename if actual not found
SYSDATE,
'INGESTED',
NULL, -- PARTITION_YEAR not used for CSV exports
NULL, -- PARTITION_MONTH not used for CSV exports
NULL -- ARCH_FILE_NAME not used for CSV exports
);
END;
END LOOP;
COMMIT;
ENV_MANAGER.LOG_PROCESS_EVENT('Successfully registered all ' || vPartitions.COUNT || ' files', 'INFO', vParameters);
END IF;
ENV_MANAGER.LOG_PROCESS_EVENT('Export completed successfully for ' || vPartitions.COUNT || ' files', 'INFO', vParameters);
ENV_MANAGER.LOG_PROCESS_EVENT('End','INFO',vParameters);

View File

@@ -9,15 +9,19 @@ AS
**/
-- Package Version Information
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.6.3';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(19) := '2026-01-28 19:30:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(50) := 'MRDS Development Team';
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.7.4';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-11 12:10:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (last 3-5 changes)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.7.4 (2026-02-11): ACTUAL FILENAME STORAGE - Store real filename with Oracle suffix in SOURCE_FILE_NAME instead of theoretical filename. Changes LIST_OBJECTS query to SELECT object_name and stores actual filename like LEGACY_DEBT_202508_1_20260211T111341375171Z.csv instead of LEGACY_DEBT_202508.csv. Enables accurate file tracking.' || CHR(10) ||
'v2.7.3 (2026-02-11): FIX LIKE pattern for DBMS_CLOUD.LIST_OBJECTS - Removed .csv extension from filename before pattern matching. Oracle EXPORT_DATA creates files with suffixes BEFORE .csv so LIKE pattern should be filename% not filename.csv%. Enables proper metadata retrieval (CHECKSUM, CREATED, BYTES).' || CHR(10) ||
'v2.7.2 (2026-02-11): FIX pRegisterExport in EXPORT_TABLE_DATA_TO_CSV_BY_DATE - Added missing pRegisterExport parameter to EXPORT_SINGLE_PARTITION call. Previously files were not registered because parameter was not passed through.' || CHR(10) ||
'v2.7.1 (2026-02-11): AUTO-LOOKUP A_SOURCE_FILE_CONFIG_KEY - Parse pFolderName to automatically find config key from A_SOURCE_FILE_CONFIG. Example: ODS/CSDB/CSDB_DEBT_DAILY extracts SOURCE_KEY=CSDB, TABLE_ID=CSDB_DEBT_DAILY.' || CHR(10) ||
'v2.7.0 (2026-02-10): Added pRegisterExport parameter to EXPORT_TABLE_DATA_TO_CSV_BY_DATE. When TRUE, registers each exported CSV file in A_SOURCE_FILE_RECEIVED with metadata from DBMS_CLOUD.LIST_OBJECTS. Enables file tracking and integrity verification.' || CHR(10);
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || CHR(10) ||
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || CHR(10) ||
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || CHR(10) ||
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || CHR(10) ||
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || CHR(10) ||
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || CHR(10) ||
@@ -146,6 +150,8 @@ AS
* but exports to CSV format instead of Parquet.
* Supports parallel partition processing via pParallelDegree parameter (1-16).
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
* When pRegisterExport=TRUE, successfully exported files are registered in:
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location, size, checksum, and metadata)
* @example
* begin
* -- With custom filename
@@ -158,7 +164,8 @@ AS
* pFileName => 'my_export.csv',
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* pParallelDegree => 8, -- Optional, default 1, range 1-16
* pRegisterExport => TRUE -- Optional, default FALSE, registers to A_SOURCE_FILE_RECEIVED
* );
*
* -- With auto-generated filename (based on table name only)
@@ -169,7 +176,8 @@ AS
* pBucketArea => 'ARCHIVE',
* pFolderName => 'exports',
* pMinDate => DATE '2025-09-01',
* pMaxDate => DATE '2025-09-17'
* pMaxDate => DATE '2025-09-17',
* pRegisterExport => TRUE -- Registers each export to A_SOURCE_FILE_RECEIVED table
* );
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
@@ -188,6 +196,7 @@ AS
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);

View File

@@ -0,0 +1,218 @@
create or replace PACKAGE CT_MRDS.DATA_EXPORTER
AUTHID CURRENT_USER
AS
/**
* Data Export Package: Provides comprehensive data export capabilities to various formats (CSV, Parquet)
* with support for cloud storage integration via Oracle Cloud Infrastructure (OCI).
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
* which returns documentation text for confluence page (to Copy-Paste it).
**/
-- Package Version Information
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.6.3';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(19) := '2026-01-28 19:30:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(50) := 'MRDS Development Team';
-- Version History (last 3-5 changes)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || CHR(10) ||
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || CHR(10) ||
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || CHR(10) ||
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || CHR(10) ||
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || CHR(10) ||
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || CHR(10) ||
'v2.3.0 (2025-12-20): Added parallel partition processing using DBMS_PARALLEL_EXECUTE. New pParallelDegree parameter (1-16, default 1) for EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures. Each year/month partition processed in separate thread for improved performance.' || CHR(10) ||
'v2.2.0 (2025-12-19): DRY refactoring - extracted shared helper functions (sanitizeFilename, VALIDATE_TABLE_AND_COLUMNS, GET_PARTITIONS, EXPORT_SINGLE_PARTITION worker procedure). Reduced code duplication by ~400 lines. Prepared architecture for v2.3.0 parallel processing.' || CHR(10) ||
'v2.1.1 (2025-12-04): Fixed JOIN column reference A_WORKFLOW_HISTORY_KEY -> A_ETL_LOAD_SET_KEY, added consistent column mapping and dynamic column list to EXPORT_TABLE_DATA procedure, enhanced DEBUG logging for all export operations' || CHR(10) ||
'v2.1.0 (2025-10-22): Added version tracking and PARTITION_YEAR/PARTITION_MONTH support' || CHR(10) ||
'v2.0.0 (2025-10-01): Separated export functionality from FILE_MANAGER package' || CHR(10);
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
vgMsgTmp VARCHAR2(32000);
---------------------------------------------------------------------------------------------------------------------------
-- TYPE DEFINITIONS FOR PARTITION HANDLING
---------------------------------------------------------------------------------------------------------------------------
/**
* Record type for year/month partition information
**/
TYPE partition_rec IS RECORD (
year VARCHAR2(4),
month VARCHAR2(2)
);
/**
* Table type for collection of partition records
**/
TYPE partition_tab IS TABLE OF partition_rec;
---------------------------------------------------------------------------------------------------------------------------
-- INTERNAL PARALLEL PROCESSING CALLBACK
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_PARTITION_PARALLEL
* @desc Internal callback procedure for DBMS_PARALLEL_EXECUTE.
* Processes single partition (year/month) chunk in parallel task.
* Called by DBMS_PARALLEL_EXECUTE framework for each chunk.
* This procedure is PUBLIC because DBMS_PARALLEL_EXECUTE requires it,
* but should NOT be called directly by external code.
* @param pStartId - Chunk start ID (CHUNK_ID from A_PARALLEL_EXPORT_CHUNKS table)
* @param pEndId - Chunk end ID (same as pStartId for single-row chunks)
**/
PROCEDURE EXPORT_PARTITION_PARALLEL (
pStartId IN NUMBER,
pEndId IN NUMBER
);
---------------------------------------------------------------------------------------------------------------------------
-- MAIN EXPORT PROCEDURES
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_TABLE_DATA
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into CSV file on OCI infrustructure.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'csv_exports'
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_BY_DATE
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into PARQUET files on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
* Validates that all columns in pColumnList exist in the target table.
* Automatically adds 'T.' prefix to column names in pColumnList.
* Supports parallel partition processing via pParallelDegree parameter (default 1, range 1-16).
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'parquet_exports',
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_TO_CSV_BY_DATE
* @desc Exports data to separate CSV files partitioned by year and month.
* Creates one CSV file for each year/month combination found in the data.
* Uses the same date filtering mechanism with CT_ODS.A_LOAD_HISTORY as EXPORT_TABLE_DATA_BY_DATE,
* but exports to CSV format instead of Parquet.
* Supports parallel partition processing via pParallelDegree parameter (1-16).
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
* @example
* begin
* -- With custom filename
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'exports',
* pFileName => 'my_export.csv',
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* );
*
* -- With auto-generated filename (based on table name only)
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'OU_TOP',
* pTableName => 'AGGREGATED_ALLOTMENT',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'ARCHIVE',
* pFolderName => 'exports',
* pMinDate => DATE '2025-09-01',
* pMaxDate => DATE '2025-09-17'
* );
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_TO_CSV_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 DEFAULT NULL,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
---------------------------------------------------------------------------------------------------------------------------
-- VERSION MANAGEMENT FUNCTIONS
---------------------------------------------------------------------------------------------------------------------------
/**
* Returns the current package version number
* return: Version string in format X.Y.Z (e.g., '2.1.0')
**/
FUNCTION GET_VERSION RETURN VARCHAR2;
/**
* Returns comprehensive build information including version, date, and author
* return: Formatted string with complete build details
**/
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
/**
* Returns the version history with recent changes
* return: Multi-line string with version history
**/
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
END;
/

View File

@@ -0,0 +1,227 @@
create or replace PACKAGE CT_MRDS.DATA_EXPORTER
AUTHID CURRENT_USER
AS
/**
* Data Export Package: Provides comprehensive data export capabilities to various formats (CSV, Parquet)
* with support for cloud storage integration via Oracle Cloud Infrastructure (OCI).
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
* which returns documentation text for confluence page (to Copy-Paste it).
**/
-- Package Version Information (Semantic Versioning: MAJOR.MINOR.PATCH)
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.8.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-10 11:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
-- Version History (Latest changes first)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.8.0 (2026-02-10): CRITICAL FIX - Removed duplicate post-export registration code that conflicted with per-partition registration. Post-export registration (SERVICE_NAME=DATA_EXPORTER, WORKFLOW_START=SYSTIMESTAMP) removed. Per-partition registration (SERVICE_NAME=CSV_EXPORT, WORKFLOW_START=partition_date) now executes exclusively. Prevents duplicate workflow records and ensures CSV files contain constant workflow keys instead of A_ETL_LOAD_SET_FK aliases.' || cgBL ||
'v2.7.0 (2026-02-09): NEW FEATURE - Added pRegisterExport parameter to EXPORT_TABLE_DATA_TO_CSV_BY_DATE. When TRUE, successfully exported files are registered in A_WORKFLOW_HISTORY (one record per YEAR/MONTH) and A_SOURCE_FILE_RECEIVED tables for tracking and audit purposes.' || cgBL ||
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || cgBL ||
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || cgBL ||
'v2.6.1 (2026-01-28): Added DELETE_FAILED_EXPORT_FILE procedure to clean up partial/corrupted files before retry. When partition fails mid-export, partial file is deleted before retry to prevent Oracle from creating _1 suffixed duplicates. Ensures clean retry without orphaned files in OCI bucket.' || cgBL ||
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || cgBL ||
'v2.5.0 (2026-01-26): Added recorddelimiter parameter with CRLF (CHR(13)||CHR(10)) for CSV exports to ensure Windows-compatible line endings. Improves cross-platform compatibility when CSV files are opened in Windows applications (Notepad, Excel).' || cgBL ||
'v2.4.0 (2026-01-11): Added pTemplateTableName parameter for per-column date format configuration. Implements dynamic query building with TO_CHAR for each date/timestamp column using FILE_MANAGER.GET_DATE_FORMAT. Supports 3-tier hierarchy: column-specific, template DEFAULT, global fallback. Eliminates single dateformat limitation of DBMS_CLOUD.EXPORT_DATA.' || cgBL ||
'v2.3.0 (2025-12-20): Added parallel partition processing using DBMS_PARALLEL_EXECUTE. New pParallelDegree parameter (1-16, default 1) for EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures. Each year/month partition processed in separate thread for improved performance.' || cgBL ||
'v2.2.0 (2025-12-19): DRY refactoring - extracted shared helper functions (sanitizeFilename, VALIDATE_TABLE_AND_COLUMNS, GET_PARTITIONS, EXPORT_SINGLE_PARTITION worker procedure). Reduced code duplication by ~400 lines. Prepared architecture for v2.3.0 parallel processing.' || cgBL ||
'v2.1.1 (2025-12-04): Fixed JOIN column reference A_WORKFLOW_HISTORY_KEY -> A_ETL_LOAD_SET_KEY, added consistent column mapping and dynamic column list to EXPORT_TABLE_DATA procedure, enhanced DEBUG logging for all export operations' || cgBL ||
'v2.1.0 (2025-10-22): Added version tracking and PARTITION_YEAR/PARTITION_MONTH support' || cgBL ||
'v2.0.0 (2025-10-01): Separated export functionality from FILE_MANAGER package';
vgMsgTmp VARCHAR2(32000);
---------------------------------------------------------------------------------------------------------------------------
-- TYPE DEFINITIONS FOR PARTITION HANDLING
---------------------------------------------------------------------------------------------------------------------------
/**
* Record type for year/month partition information
**/
TYPE partition_rec IS RECORD (
year VARCHAR2(4),
month VARCHAR2(2)
);
/**
* Table type for collection of partition records
**/
TYPE partition_tab IS TABLE OF partition_rec;
---------------------------------------------------------------------------------------------------------------------------
-- INTERNAL PARALLEL PROCESSING CALLBACK
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_PARTITION_PARALLEL
* @desc Internal callback procedure for DBMS_PARALLEL_EXECUTE.
* Processes single partition (year/month) chunk in parallel task.
* Called by DBMS_PARALLEL_EXECUTE framework for each chunk.
* This procedure is PUBLIC because DBMS_PARALLEL_EXECUTE requires it,
* but should NOT be called directly by external code.
* @param pStartId - Chunk start ID (CHUNK_ID from A_PARALLEL_EXPORT_CHUNKS table)
* @param pEndId - Chunk end ID (same as pStartId for single-row chunks)
**/
PROCEDURE EXPORT_PARTITION_PARALLEL (
pStartId IN NUMBER,
pEndId IN NUMBER
);
---------------------------------------------------------------------------------------------------------------------------
-- MAIN EXPORT PROCEDURES
---------------------------------------------------------------------------------------------------------------------------
/**
* @name EXPORT_TABLE_DATA
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into CSV file on OCI infrustructure.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'csv_exports'
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_BY_DATE
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data into PARQUET files on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* Allows specifying custom column list or uses T.* if pColumnList is NULL.
* Validates that all columns in pColumnList exist in the target table.
* Automatically adds 'T.' prefix to column names in pColumnList.
* Supports parallel partition processing via pParallelDegree parameter (default 1, range 1-16).
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* @example
* begin
* DATA_EXPORTER.EXPORT_TABLE_DATA_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'parquet_exports',
* pColumnList => 'COLUMN1, COLUMN2, COLUMN3', -- Optional
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8 -- Optional, default 1, range 1-16
* );
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
/**
* @name EXPORT_TABLE_DATA_TO_CSV_BY_DATE
* @desc Exports data to separate CSV files partitioned by year and month.
* Creates one CSV file for each year/month combination found in the data.
* Uses the same date filtering mechanism with CT_ODS.A_LOAD_HISTORY as EXPORT_TABLE_DATA_BY_DATE,
* but exports to CSV format instead of Parquet.
* Supports parallel partition processing via pParallelDegree parameter (1-16).
* File naming pattern: {pFileName}_YYYYMM.csv or {TABLENAME}_YYYYMM.csv (if pFileName is NULL)
* When pRegisterExport=TRUE, successfully exported files are registered in:
* - CT_MRDS.A_WORKFLOW_HISTORY (one record per YEAR/MONTH with export timestamp)
* - CT_MRDS.A_SOURCE_FILE_RECEIVED (tracks file location and partition info)
* @example
* begin
* -- With custom filename
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'CT_MRDS',
* pTableName => 'MY_TABLE',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'DATA',
* pFolderName => 'exports',
* pFileName => 'my_export.csv',
* pMinDate => DATE '2024-01-01',
* pMaxDate => SYSDATE,
* pParallelDegree => 8, -- Optional, default 1, range 1-16
* pRegisterExport => TRUE -- Optional, default FALSE, registers to A_WORKFLOW_HISTORY and A_SOURCE_FILE_RECEIVED
* );
*
* -- With auto-generated filename (based on table name only)
* DATA_EXPORTER.EXPORT_TABLE_DATA_TO_CSV_BY_DATE(
* pSchemaName => 'OU_TOP',
* pTableName => 'AGGREGATED_ALLOTMENT',
* pKeyColumnName => 'A_ETL_LOAD_SET_KEY_FK',
* pBucketArea => 'ARCHIVE',
* pFolderName => 'exports',
* pMinDate => DATE '2025-09-01',
* pMaxDate => DATE '2025-09-17',
* pRegisterExport => TRUE -- Registers each export to tracking tables
* );
* -- This will create files like: AGGREGATED_ALLOTMENT_202509.csv, etc.
* pBucketArea parameter accepts: 'INBOX', 'ODS', 'DATA', 'ARCHIVE'
* end;
**/
PROCEDURE EXPORT_TABLE_DATA_TO_CSV_BY_DATE (
pSchemaName IN VARCHAR2,
pTableName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pBucketArea IN VARCHAR2,
pFolderName IN VARCHAR2,
pFileName IN VARCHAR2 DEFAULT NULL,
pColumnList IN VARCHAR2 default NULL,
pMinDate IN DATE default DATE '1900-01-01',
pMaxDate IN DATE default SYSDATE,
pParallelDegree IN NUMBER default 1,
pTemplateTableName IN VARCHAR2 default NULL,
pMaxFileSize IN NUMBER default 104857600,
pRegisterExport IN BOOLEAN default FALSE,
pCredentialName IN VARCHAR2 default ENV_MANAGER.gvCredentialName
);
---------------------------------------------------------------------------------------------------------------------------
-- VERSION MANAGEMENT FUNCTIONS
---------------------------------------------------------------------------------------------------------------------------
/**
* Returns the current package version number
* return: Version string in format X.Y.Z (e.g., '2.1.0')
**/
FUNCTION GET_VERSION RETURN VARCHAR2;
/**
* Returns comprehensive build information including version, date, and author
* return: Formatted string with complete build details
**/
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
/**
* Returns the version history with recent changes
* return: Multi-line string with version history
**/
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
END;
/

View File

@@ -106,7 +106,8 @@ BEGIN
pMaxDate => SYSDATE,
pParallelDegree => 16,
pTemplateTableName => 'CT_ET_TEMPLATES.CSDB_DEBT',
pMaxFileSize => 104857600 -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
pMaxFileSize => 104857600, -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
pRegisterExport => TRUE -- Register exported files in A_SOURCE_FILE_RECEIVED with metadata (CHECKSUM, CREATED, BYTES)
);
DBMS_OUTPUT.PUT_LINE('SUCCESS: LEGACY_DEBT exported to DATA bucket with template column order');
@@ -225,7 +226,8 @@ BEGIN
pMaxDate => SYSDATE,
pParallelDegree => 16,
pTemplateTableName => 'CT_ET_TEMPLATES.CSDB_DEBT_DAILY',
pMaxFileSize => 104857600 -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
pMaxFileSize => 104857600, -- 100MB in bytes (safe for parallel execution, avoids ORA-04036)
pRegisterExport => TRUE -- Register exported files in A_SOURCE_FILE_RECEIVED with metadata (CHECKSUM, CREATED, BYTES)
);
DBMS_OUTPUT.PUT_LINE('SUCCESS: LEGACY_DEBT_DAILY exported to DATA bucket with template column order');