From ca5d8b320c91ea0898d2734d1fd1caef68be4ba8 Mon Sep 17 00:00:00 2001 From: Grzegorz Michalski Date: Tue, 24 Feb 2026 09:38:09 +0100 Subject: [PATCH] feat(MARS-835): Enhance DELETE_FAILED_EXPORT_FILE procedure to delete all matching files before retrying export, preventing data duplication in parallel processing --- .../new_version/DATA_EXPORTER.pkb | 108 +++++++++++++++--- .../new_version/DATA_EXPORTER.pkg | 8 +- 2 files changed, 93 insertions(+), 23 deletions(-) diff --git a/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkb b/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkb index bbb8b32..f24cc3a 100644 --- a/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkb +++ b/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkb @@ -18,34 +18,104 @@ AS ---------------------------------------------------------------------------------------------------- /** - * Deletes export file from OCI bucket if it exists (used for cleanup before retry) - * Silently ignores if file doesn't exist (ORA-20404) + * Deletes ALL files matching specific file pattern before retry export + * Critical for preventing data duplication when DBMS_CLOUD.EXPORT_DATA fails mid-process + * + * Problem: Export fails after creating partial file(s), retry creates new _2, _3 suffixed files + * Solution: Delete ALL files matching the base filename pattern before retry + * + * Pattern matching strategy: + * - Parquet: folder/PARTITION_YEAR=2024/PARTITION_MONTH=11/*.parquet (folder-level safe - each chunk has own partition folder) + * - CSV: folder/TABLENAME_202411*.csv (file-level pattern - multiple chunks share same folder!) + * + * CRITICAL for parallel processing: + * - Parquet chunks are isolated by partition folder structure (safe to delete folder/*) + * - CSV chunks share flat folder structure - MUST use file-specific pattern (TABLENAME_YYYYMM*) + * to avoid deleting files from other parallel chunks in same folder **/ PROCEDURE DELETE_FAILED_EXPORT_FILE( pFileUri IN VARCHAR2, pCredentialName IN VARCHAR2, pParameters IN VARCHAR2 ) IS + vBucketUri VARCHAR2(4000); + vFolderPath VARCHAR2(4000); + vFileName VARCHAR2(1000); + vFileNamePattern VARCHAR2(1000); + vSlashPos NUMBER; + vDotPos NUMBER; + vFilesDeleted NUMBER := 0; BEGIN - BEGIN - ENV_MANAGER.LOG_PROCESS_EVENT('Attempting to delete potentially corrupted file: ' || pFileUri, 'DEBUG', pParameters); + -- Extract components from URI + -- Example Parquet: https://.../bucket/folder/PARTITION_YEAR=2024/PARTITION_MONTH=11/202411.parquet + -- Example CSV: https://.../bucket/folder/TABLENAME_202411.csv + + -- Find last slash before filename + vSlashPos := INSTR(pFileUri, '/', -1); + + IF vSlashPos > 0 THEN + -- Extract filename from URI (after last slash) + vFileName := SUBSTR(pFileUri, vSlashPos + 1); - DBMS_CLOUD.DELETE_OBJECT( - credential_name => pCredentialName, - object_uri => pFileUri - ); + -- Extract folder path (before last slash) + vFolderPath := SUBSTR(pFileUri, 1, vSlashPos - 1); - ENV_MANAGER.LOG_PROCESS_EVENT('Deleted existing file (cleanup before retry): ' || pFileUri, 'INFO', pParameters); - EXCEPTION - WHEN OTHERS THEN - -- Object not found is OK (file doesn't exist) - IF SQLCODE = -20404 THEN - ENV_MANAGER.LOG_PROCESS_EVENT('File does not exist (OK): ' || pFileUri, 'DEBUG', pParameters); - ELSE - -- Log but don't fail - export will attempt anyway - ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Could not delete file (will retry export anyway): ' || SQLERRM, 'WARNING', pParameters); - END IF; - END; + -- Find bucket URI (protocol + namespace + bucket name) + -- Bucket URI ends after /o/ in OCI Object Storage URLs + vBucketUri := SUBSTR(pFileUri, 1, INSTR(pFileUri, '/o/') + 2); + + -- Extract relative folder path (after bucket) + vFolderPath := SUBSTR(vFolderPath, LENGTH(vBucketUri) + 1); + + -- Create file pattern by removing extension + -- Oracle adds suffixes BEFORE extension: file.csv -> file_1_timestamp.csv + -- Pattern: file* matches file_1_timestamp.csv, file_2_timestamp.csv + vDotPos := INSTR(vFileName, '.', -1); + IF vDotPos > 0 THEN + vFileNamePattern := SUBSTR(vFileName, 1, vDotPos - 1) || '%'; + ELSE + vFileNamePattern := vFileName || '%'; + END IF; + + ENV_MANAGER.LOG_PROCESS_EVENT('Cleanup before retry - Pattern: ' || vFolderPath || '/' || vFileNamePattern, 'DEBUG', pParameters); + + -- List and delete ALL files matching pattern + -- CRITICAL: Uses file-specific pattern for CSV chunk isolation in shared folder + FOR rec IN ( + SELECT object_name + FROM TABLE(DBMS_CLOUD.LIST_OBJECTS( + credential_name => pCredentialName, + location_uri => vBucketUri + )) + WHERE object_name LIKE vFolderPath || '/' || vFileNamePattern + ) LOOP + BEGIN + DBMS_CLOUD.DELETE_OBJECT( + credential_name => pCredentialName, + object_uri => vBucketUri || rec.object_name + ); + + vFilesDeleted := vFilesDeleted + 1; + ENV_MANAGER.LOG_PROCESS_EVENT('Deleted partial file ' || vFilesDeleted || ': ' || rec.object_name, 'DEBUG', pParameters); + EXCEPTION + WHEN OTHERS THEN + -- Log but continue - don't fail entire cleanup + ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Could not delete ' || rec.object_name || ': ' || SQLERRM, 'WARNING', pParameters); + END; + END LOOP; + + IF vFilesDeleted > 0 THEN + ENV_MANAGER.LOG_PROCESS_EVENT('Cleanup completed: Deleted ' || vFilesDeleted || ' partial file(s) from previous failed export', 'INFO', pParameters); + ELSE + ENV_MANAGER.LOG_PROCESS_EVENT('No existing files to clean up (pattern match: ' || vFileNamePattern || ')', 'DEBUG', pParameters); + END IF; + ELSE + ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Cannot parse file URI for cleanup: ' || pFileUri, 'WARNING', pParameters); + END IF; + EXCEPTION + WHEN OTHERS THEN + -- Don't fail export if cleanup fails - log and continue + ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Cleanup failed (will retry export anyway): ' || SQLERRM, 'WARNING', pParameters); END DELETE_FAILED_EXPORT_FILE; ---------------------------------------------------------------------------------------------------- diff --git a/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkg b/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkg index a723db1..768759a 100644 --- a/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkg +++ b/MARS_Packages/REL01_ADDITIONS/MARS-835-PREHOOK/new_version/DATA_EXPORTER.pkg @@ -9,17 +9,17 @@ AS **/ -- Package Version Information - PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.11.0'; - PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-18 10:00:00'; + PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.12.0'; + PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-24 14:30:00'; PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski'; -- Version History (last 3-5 changes) VERSION_HISTORY CONSTANT VARCHAR2(4000) := + 'v2.12.0 (2026-02-24): CRITICAL FIX - Rewritten DELETE_FAILED_EXPORT_FILE to use file-specific pattern matching (prevents deleting parallel CSV chunks in shared folder). Added vQuery logging before DBMS_CLOUD calls. Added CSV maxfilesize logging.' || CHR(10) || 'v2.11.0 (2026-02-18): Added pJobClass parameter to EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE for Oracle Scheduler job class support (resource/priority management).' || CHR(10) || 'v2.10.1 (2026-02-17): CRITICAL FIX - Remove redundant COMPLETED chunks deletion before parallel export that caused ORA-01403 errors (phantom chunks created by CREATE_CHUNKS_BY_NUMBER_COL).' || CHR(10) || 'v2.10.0 (2026-02-13): CRITICAL FIX - Register ALL files created by DBMS_CLOUD.EXPORT_DATA (multi-file support due to Oracle parallel processing on large instances). Prevents orphaned files in rollback.' || CHR(10) || - 'v2.9.0 (2026-02-13): Added pProcessName parameter to EXPORT_TABLE_DATA and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures for process tracking in A_SOURCE_FILE_RECEIVED table.' || CHR(10) || - 'v2.8.1 (2026-02-12): FIX query in EXPORT_TABLE_DATA - removed A_LOAD_HISTORY join to ensure single file output (simple SELECT).' || CHR(10); + 'v2.9.0 (2026-02-13): Added pProcessName parameter to EXPORT_TABLE_DATA and EXPORT_TABLE_DATA_TO_CSV_BY_DATE procedures for process tracking in A_SOURCE_FILE_RECEIVED table.' || CHR(10); cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10); vgMsgTmp VARCHAR2(32000);