feat(MARS-835): Enhance DELETE_FAILED_EXPORT_FILE procedure to delete all matching files before retrying export, preventing data duplication in parallel processing

This commit is contained in:
Grzegorz Michalski
2026-02-24 09:38:09 +01:00
parent 2605896469
commit ca5d8b320c
2 changed files with 93 additions and 23 deletions

View File

@@ -18,34 +18,104 @@ AS
----------------------------------------------------------------------------------------------------
/**
* Deletes export file from OCI bucket if it exists (used for cleanup before retry)
* Silently ignores if file doesn't exist (ORA-20404)
* Deletes ALL files matching specific file pattern before retry export
* Critical for preventing data duplication when DBMS_CLOUD.EXPORT_DATA fails mid-process
*
* Problem: Export fails after creating partial file(s), retry creates new _2, _3 suffixed files
* Solution: Delete ALL files matching the base filename pattern before retry
*
* Pattern matching strategy:
* - Parquet: folder/PARTITION_YEAR=2024/PARTITION_MONTH=11/*.parquet (folder-level safe - each chunk has own partition folder)
* - CSV: folder/TABLENAME_202411*.csv (file-level pattern - multiple chunks share same folder!)
*
* CRITICAL for parallel processing:
* - Parquet chunks are isolated by partition folder structure (safe to delete folder/*)
* - CSV chunks share flat folder structure - MUST use file-specific pattern (TABLENAME_YYYYMM*)
* to avoid deleting files from other parallel chunks in same folder
**/
PROCEDURE DELETE_FAILED_EXPORT_FILE(
pFileUri IN VARCHAR2,
pCredentialName IN VARCHAR2,
pParameters IN VARCHAR2
) IS
vBucketUri VARCHAR2(4000);
vFolderPath VARCHAR2(4000);
vFileName VARCHAR2(1000);
vFileNamePattern VARCHAR2(1000);
vSlashPos NUMBER;
vDotPos NUMBER;
vFilesDeleted NUMBER := 0;
BEGIN
BEGIN
ENV_MANAGER.LOG_PROCESS_EVENT('Attempting to delete potentially corrupted file: ' || pFileUri, 'DEBUG', pParameters);
-- Extract components from URI
-- Example Parquet: https://.../bucket/folder/PARTITION_YEAR=2024/PARTITION_MONTH=11/202411.parquet
-- Example CSV: https://.../bucket/folder/TABLENAME_202411.csv
-- Find last slash before filename
vSlashPos := INSTR(pFileUri, '/', -1);
IF vSlashPos > 0 THEN
-- Extract filename from URI (after last slash)
vFileName := SUBSTR(pFileUri, vSlashPos + 1);
DBMS_CLOUD.DELETE_OBJECT(
credential_name => pCredentialName,
object_uri => pFileUri
);
-- Extract folder path (before last slash)
vFolderPath := SUBSTR(pFileUri, 1, vSlashPos - 1);
ENV_MANAGER.LOG_PROCESS_EVENT('Deleted existing file (cleanup before retry): ' || pFileUri, 'INFO', pParameters);
EXCEPTION
WHEN OTHERS THEN
-- Object not found is OK (file doesn't exist)
IF SQLCODE = -20404 THEN
ENV_MANAGER.LOG_PROCESS_EVENT('File does not exist (OK): ' || pFileUri, 'DEBUG', pParameters);
ELSE
-- Log but don't fail - export will attempt anyway
ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Could not delete file (will retry export anyway): ' || SQLERRM, 'WARNING', pParameters);
END IF;
END;
-- Find bucket URI (protocol + namespace + bucket name)
-- Bucket URI ends after /o/ in OCI Object Storage URLs
vBucketUri := SUBSTR(pFileUri, 1, INSTR(pFileUri, '/o/') + 2);
-- Extract relative folder path (after bucket)
vFolderPath := SUBSTR(vFolderPath, LENGTH(vBucketUri) + 1);
-- Create file pattern by removing extension
-- Oracle adds suffixes BEFORE extension: file.csv -> file_1_timestamp.csv
-- Pattern: file* matches file_1_timestamp.csv, file_2_timestamp.csv
vDotPos := INSTR(vFileName, '.', -1);
IF vDotPos > 0 THEN
vFileNamePattern := SUBSTR(vFileName, 1, vDotPos - 1) || '%';
ELSE
vFileNamePattern := vFileName || '%';
END IF;
ENV_MANAGER.LOG_PROCESS_EVENT('Cleanup before retry - Pattern: ' || vFolderPath || '/' || vFileNamePattern, 'DEBUG', pParameters);
-- List and delete ALL files matching pattern
-- CRITICAL: Uses file-specific pattern for CSV chunk isolation in shared folder
FOR rec IN (
SELECT object_name
FROM TABLE(DBMS_CLOUD.LIST_OBJECTS(
credential_name => pCredentialName,
location_uri => vBucketUri
))
WHERE object_name LIKE vFolderPath || '/' || vFileNamePattern
) LOOP
BEGIN
DBMS_CLOUD.DELETE_OBJECT(
credential_name => pCredentialName,
object_uri => vBucketUri || rec.object_name
);
vFilesDeleted := vFilesDeleted + 1;
ENV_MANAGER.LOG_PROCESS_EVENT('Deleted partial file ' || vFilesDeleted || ': ' || rec.object_name, 'DEBUG', pParameters);
EXCEPTION
WHEN OTHERS THEN
-- Log but continue - don't fail entire cleanup
ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Could not delete ' || rec.object_name || ': ' || SQLERRM, 'WARNING', pParameters);
END;
END LOOP;
IF vFilesDeleted > 0 THEN
ENV_MANAGER.LOG_PROCESS_EVENT('Cleanup completed: Deleted ' || vFilesDeleted || ' partial file(s) from previous failed export', 'INFO', pParameters);
ELSE
ENV_MANAGER.LOG_PROCESS_EVENT('No existing files to clean up (pattern match: ' || vFileNamePattern || ')', 'DEBUG', pParameters);
END IF;
ELSE
ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Cannot parse file URI for cleanup: ' || pFileUri, 'WARNING', pParameters);
END IF;
EXCEPTION
WHEN OTHERS THEN
-- Don't fail export if cleanup fails - log and continue
ENV_MANAGER.LOG_PROCESS_EVENT('Warning: Cleanup failed (will retry export anyway): ' || SQLERRM, 'WARNING', pParameters);
END DELETE_FAILED_EXPORT_FILE;
----------------------------------------------------------------------------------------------------