Update DATA_EXPORTER package to version 2.7.4, enhancing filename storage and metadata retrieval logic for exported files.

This commit is contained in:
Grzegorz Michalski
2026-02-11 18:27:44 +01:00
parent d237c2d7aa
commit 3171ff2ddf
2 changed files with 101 additions and 54 deletions

View File

@@ -1055,7 +1055,6 @@ AS
vConfigKey NUMBER := -1; vConfigKey NUMBER := -1;
vSlashPos1 NUMBER; vSlashPos1 NUMBER;
vSlashPos2 NUMBER; vSlashPos2 NUMBER;
vFileUri VARCHAR2(4000);
BEGIN BEGIN
vParameters := ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST( 'pSchemaName => '''||nvl(pSchemaName, 'NULL')||'''' vParameters := ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST( 'pSchemaName => '''||nvl(pSchemaName, 'NULL')||''''
@@ -1262,6 +1261,9 @@ AS
END; END;
END IF; END IF;
END IF; END IF;
-- Note: File registration handled by EXPORT_SINGLE_PARTITION when pRegisterExport=TRUE
-- Each partition calls pRegisterExport logic independently during serial/parallel execution
-- Register exported files to A_SOURCE_FILE_RECEIVED if requested (after successful export) -- Register exported files to A_SOURCE_FILE_RECEIVED if requested (after successful export)
IF pRegisterExport THEN IF pRegisterExport THEN
@@ -1307,57 +1309,99 @@ AS
vFileName := NVL(vFileBaseName, UPPER(REPLACE(vTableName, vSchemaName || '.', ''))) || '_' || vPartitions(i).year || vPartitions(i).month || '.csv'; vFileName := NVL(vFileBaseName, UPPER(REPLACE(vTableName, vSchemaName || '.', ''))) || '_' || vPartitions(i).year || vPartitions(i).month || '.csv';
vFileUri := vBucketUri || CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || sanitizeFilename(vFileName); vFileUri := vBucketUri || CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || sanitizeFilename(vFileName);
-- Get file metadata from OCI bucket (CHECKSUM, CREATED, BYTES) -- Get file metadata from OCI bucket (CHECKSUM, CREATED, BYTES) with retry logic
DECLARE DECLARE
vChecksum VARCHAR2(128); vChecksum VARCHAR2(128);
vCreated TIMESTAMP WITH TIME ZONE; vCreated TIMESTAMP WITH TIME ZONE;
vBytes NUMBER; vBytes NUMBER;
vSanitizedFileName VARCHAR2(1000); vActualFileName VARCHAR2(1000); -- Actual filename with Oracle suffix
BEGIN vSanitizedFileName VARCHAR2(1000);
-- Sanitize filename first (PL/SQL function cannot be used directly in SQL) vRetryCount NUMBER := 0;
vSanitizedFileName := sanitizeFilename(vFileName); vMaxRetries NUMBER := 1; -- One retry after initial attempt
vRetryDelay NUMBER := 2; -- 2 seconds delay
SELECT checksum, created, bytes BEGIN
INTO vChecksum, vCreated, vBytes -- Sanitize filename first (PL/SQL function cannot be used directly in SQL)
FROM TABLE(DBMS_CLOUD.LIST_OBJECTS( vSanitizedFileName := sanitizeFilename(vFileName);
credential_name => pCredentialName,
location_uri => vBucketUri -- Remove .csv extension for LIKE pattern matching (Oracle adds suffixes BEFORE .csv)
)) -- Example: LEGACY_DEBT_202508.csv becomes LEGACY_DEBT_202508_1_20260211T102621591769Z.csv
WHERE object_name = CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || vSanitizedFileName; vSanitizedFileName := REGEXP_REPLACE(vSanitizedFileName, '\.csv$', '', 1, 0, 'i');
-- Create A_SOURCE_FILE_RECEIVED record for this export with metadata -- Try to get file metadata with retry logic
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL; <<metadata_retry_loop>>
INSERT INTO CT_MRDS.A_SOURCE_FILE_RECEIVED ( LOOP
A_SOURCE_FILE_RECEIVED_KEY, BEGIN
A_SOURCE_FILE_CONFIG_KEY, SELECT object_name, checksum, created, bytes
SOURCE_FILE_NAME, INTO vActualFileName, vChecksum, vCreated, vBytes
CHECKSUM, FROM TABLE(DBMS_CLOUD.LIST_OBJECTS(
CREATED, credential_name => pCredentialName,
BYTES, location_uri => vBucketUri
RECEPTION_DATE, ))
PROCESSING_STATUS, WHERE object_name LIKE CASE WHEN pFolderName IS NOT NULL THEN pFolderName || '/' ELSE '' END || vSanitizedFileName || '%'
PARTITION_YEAR, ORDER BY created DESC, bytes DESC
PARTITION_MONTH, FETCH FIRST 1 ROW ONLY;
ARCH_FILE_NAME
) VALUES ( -- Extract filename only from full path (remove bucket folder prefix)
vSourceFileReceivedKey, -- vActualFileName contains: 'ODS/CSDB/CSDB_DEBT/LEGACY_DEBT_202508_1_20260211T111341375171Z.csv'
vConfigKey, -- Config key from A_SOURCE_FILE_CONFIG lookup -- Extract only: 'LEGACY_DEBT_202508_1_20260211T111341375171Z.csv'
vFileUri, vActualFileName := SUBSTR(vActualFileName, INSTR(vActualFileName, '/', -1) + 1);
vChecksum,
vCreated, -- Success - exit retry loop
vBytes, EXIT metadata_retry_loop;
SYSDATE,
'ARCHIVED', EXCEPTION
vPartitions(i).year, WHEN NO_DATA_FOUND THEN
vPartitions(i).month, vRetryCount := vRetryCount + 1;
vFileName
); IF vRetryCount <= vMaxRetries THEN
-- Log retry attempt
ENV_MANAGER.LOG_PROCESS_EVENT('File not found in bucket (attempt ' || vRetryCount || '/' || (vMaxRetries + 1) || '), retrying after ' || vRetryDelay || ' seconds: ' || vFileName, 'DEBUG', vParameters);
-- Wait before retry using DBMS_SESSION.SLEEP (alternative to DBMS_LOCK)
DBMS_SESSION.SLEEP(vRetryDelay);
ELSE
-- Max retries exceeded - re-raise exception
RAISE;
END IF;
END;
END LOOP metadata_retry_loop;
-- Create A_SOURCE_FILE_RECEIVED record for this export with metadata
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL;
INSERT INTO CT_MRDS.A_SOURCE_FILE_RECEIVED (
A_SOURCE_FILE_RECEIVED_KEY,
A_SOURCE_FILE_CONFIG_KEY,
SOURCE_FILE_NAME,
CHECKSUM,
CREATED,
BYTES,
RECEPTION_DATE,
PROCESSING_STATUS,
PARTITION_YEAR,
PARTITION_MONTH,
ARCH_FILE_NAME
) VALUES (
vSourceFileReceivedKey,
vConfigKey, -- Config key from A_SOURCE_FILE_CONFIG lookup
vActualFileName, -- Use actual filename with Oracle suffix
vChecksum,
vCreated,
vBytes,
SYSDATE,
'INGESTED',
NULL, -- PARTITION_YEAR not used for CSV exports
NULL, -- PARTITION_MONTH not used for CSV exports
NULL -- ARCH_FILE_NAME not used for CSV exports
);
ENV_MANAGER.LOG_PROCESS_EVENT('Registered file: FileReceivedKey=' || vSourceFileReceivedKey || ', File=' || vFileName || ', Size=' || vBytes || ' bytes', 'DEBUG', vParameters); ENV_MANAGER.LOG_PROCESS_EVENT('Registered file: FileReceivedKey=' || vSourceFileReceivedKey || ', File=' || vActualFileName || ', Size=' || vBytes || ' bytes', 'DEBUG', vParameters);
EXCEPTION EXCEPTION
WHEN NO_DATA_FOUND THEN WHEN NO_DATA_FOUND THEN
-- File not found in bucket - log warning but continue -- File not found after retries - log warning and continue without metadata
ENV_MANAGER.LOG_PROCESS_EVENT('WARNING: File not found in bucket for metadata lookup: ' || vFileName, 'WARNING', vParameters); ENV_MANAGER.LOG_PROCESS_EVENT('WARNING: File not found in bucket after ' || (vMaxRetries + 1) || ' attempts: ' || vFileName, 'WARNING', vParameters);
-- Sanitize filename for fallback INSERT (function cannot be used in SQL)
vSanitizedFileName := sanitizeFilename(vFileName);
-- Insert without metadata -- Insert without metadata
vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL; vSourceFileReceivedKey := CT_MRDS.A_SOURCE_FILE_RECEIVED_KEY_SEQ.NEXTVAL;
@@ -1373,12 +1417,12 @@ AS
) VALUES ( ) VALUES (
vSourceFileReceivedKey, vSourceFileReceivedKey,
vConfigKey, -- Config key from A_SOURCE_FILE_CONFIG lookup vConfigKey, -- Config key from A_SOURCE_FILE_CONFIG lookup
vFileUri, vSanitizedFileName, -- Fallback: use theoretical filename if actual not found
SYSDATE, SYSDATE,
'ARCHIVED', 'INGESTED',
vPartitions(i).year, NULL, -- PARTITION_YEAR not used for CSV exports
vPartitions(i).month, NULL, -- PARTITION_MONTH not used for CSV exports
vFileName NULL -- ARCH_FILE_NAME not used for CSV exports
); );
END; END;
END LOOP; END LOOP;

View File

@@ -9,14 +9,17 @@ AS
**/ **/
-- Package Version Information -- Package Version Information
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.7.1'; PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.7.4';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-11 10:00:00'; PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-11 12:10:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski'; PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (last 3-5 changes) -- Version History (last 3-5 changes)
VERSION_HISTORY CONSTANT VARCHAR2(4000) := VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.7.1 (2026-02-11): AUTO-LOOKUP A_SOURCE_FILE_CONFIG_KEY - Parse pFolderName (format: BUCKET/SOURCE/TABLE_ID) to automatically find config key from A_SOURCE_FILE_CONFIG. Example: ODS/CSDB/CSDB_DEBT_DAILY extracts SOURCE_KEY=CSDB, TABLE_ID=CSDB_DEBT_DAILY and looks up config. No more hardcoded -1 in A_SOURCE_FILE_RECEIVED.' || CHR(10) || 'v2.7.4 (2026-02-11): ACTUAL FILENAME STORAGE - Store real filename with Oracle suffix in SOURCE_FILE_NAME instead of theoretical filename. Changes LIST_OBJECTS query to SELECT object_name and stores actual filename like LEGACY_DEBT_202508_1_20260211T111341375171Z.csv instead of LEGACY_DEBT_202508.csv. Enables accurate file tracking.' || CHR(10) ||
'v2.7.0 (2026-02-10): Added pRegisterExport parameter to EXPORT_TABLE_DATA_TO_CSV_BY_DATE. When TRUE, registers each exported CSV file in A_SOURCE_FILE_RECEIVED with metadata (CHECKSUM, CREATED, BYTES) from DBMS_CLOUD.LIST_OBJECTS. Enables file tracking and integrity verification.' || CHR(10) || 'v2.7.3 (2026-02-11): FIX LIKE pattern for DBMS_CLOUD.LIST_OBJECTS - Removed .csv extension from filename before pattern matching. Oracle EXPORT_DATA creates files with suffixes BEFORE .csv so LIKE pattern should be filename% not filename.csv%. Enables proper metadata retrieval (CHECKSUM, CREATED, BYTES).' || CHR(10) ||
'v2.7.2 (2026-02-11): FIX pRegisterExport in EXPORT_TABLE_DATA_TO_CSV_BY_DATE - Added missing pRegisterExport parameter to EXPORT_SINGLE_PARTITION call. Previously files were not registered because parameter was not passed through.' || CHR(10) ||
'v2.7.1 (2026-02-11): AUTO-LOOKUP A_SOURCE_FILE_CONFIG_KEY - Parse pFolderName to automatically find config key from A_SOURCE_FILE_CONFIG. Example: ODS/CSDB/CSDB_DEBT_DAILY extracts SOURCE_KEY=CSDB, TABLE_ID=CSDB_DEBT_DAILY.' || CHR(10) ||
'v2.7.0 (2026-02-10): Added pRegisterExport parameter to EXPORT_TABLE_DATA_TO_CSV_BY_DATE. When TRUE, registers each exported CSV file in A_SOURCE_FILE_RECEIVED with metadata from DBMS_CLOUD.LIST_OBJECTS. Enables file tracking and integrity verification.' || CHR(10);
'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || CHR(10) || 'v2.6.3 (2026-01-28): COMPILATION FIX - Resolved ORA-00904 error in EXPORT_PARTITION_PARALLEL. SQLERRM and DBMS_UTILITY.FORMAT_ERROR_BACKTRACE cannot be used directly in SQL UPDATE statements. Now properly assigned to vgMsgTmp variable before UPDATE.' || CHR(10) ||
'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || CHR(10) || 'v2.6.2 (2026-01-28): CRITICAL FIX - Race condition when multiple exports run simultaneously. Changed DELETE to filter by age (>24h) instead of deleting all COMPLETED chunks. Prevents concurrent sessions from deleting each other chunks. Session-safe cleanup with TASK_NAME filtering. Enables true parallel execution of multiple export jobs.' || CHR(10) ||
'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || CHR(10) || 'v2.6.0 (2026-01-28): CRITICAL FIX - Added STATUS tracking to A_PARALLEL_EXPORT_CHUNKS table to prevent data duplication on retry. System now restarts ONLY failed partitions instead of re-exporting all data. Added ERROR_MESSAGE and EXPORT_TIMESTAMP columns for better error handling and monitoring. Prevents duplicate file creation when parallel tasks fail (e.g., 22 partitions with 16 threads, 3 failures no longer duplicates 19 successful exports).' || CHR(10) ||