Compare commits

...

2 Commits

Author SHA1 Message Date
Grzegorz Michalski
d443c4f07d Update FILE_ARCHIVER package to version 3.2.0 with TRASH retention control and new statuses 2026-02-10 08:23:06 +01:00
Grzegorz Michalski
cdd9dff32d aktualizacja dokumentacji w związku z TRASH i nowymi statusami plików. 2026-02-10 08:21:51 +01:00
16 changed files with 361 additions and 82 deletions

View File

@@ -1,14 +1,17 @@
-- ===================================================================
-- MARS-828: Install FILE_ARCHIVER Package Specification v3.0.0
-- MARS-828: Install FILE_ARCHIVER Package Specification v3.2.0
-- ===================================================================
-- Purpose: Deploy updated package specification with version 3.0.0
-- Purpose: Deploy updated package specification with version 3.2.0
-- Author: Grzegorz Michalski
-- Date: 2026-01-27
-- Date: 2026-02-06
-- Changes:
-- - Added pKeepInTrash parameter (DEFAULT TRUE) to ARCHIVE_TABLE_DATA
-- - TRASH folder retention control for safety and compliance
-- ===================================================================
@@new_version/FILE_ARCHIVER.pkg
PROMPT ========================================
PROMPT FILE_ARCHIVER Specification v3.0.0 ready for installation
PROMPT FILE_ARCHIVER Specification v3.2.0 installed successfully
PROMPT ========================================

View File

@@ -1,13 +1,15 @@
-- ===================================================================
-- MARS-828: Install FILE_ARCHIVER Package Body v3.0.0
-- MARS-828: Install FILE_ARCHIVER Package Body v3.2.0
-- ===================================================================
-- Purpose: Deploy updated package body with GET_ARCHIVAL_WHERE_CLAUSE function
-- Purpose: Deploy updated package body with TRASH folder retention control
-- Author: Grzegorz Michalski
-- Date: 2026-01-27
-- Date: 2026-02-06
-- Changes:
-- - Added GET_ARCHIVAL_WHERE_CLAUSE private function
-- - Updated ARCHIVE_TABLE_DATA to use strategy-based filtering
-- - Updated GATHER_TABLE_STAT to use strategy-based statistics
-- - v3.0.0: Added GET_ARCHIVAL_WHERE_CLAUSE for flexible archival strategies
-- - v3.1.0: Added function overloads for Python integration
-- - v3.1.1: Fixed ORA-01422 for multiple parquet files (directory prefix storage)
-- - v3.1.2: Fixed PARTITION_YEAR/PARTITION_MONTH assignments and circular dependency
-- - v3.2.0: Added pKeepInTrash parameter for TRASH folder retention control
-- ===================================================================
@@new_version/FILE_ARCHIVER.pkb

View File

@@ -0,0 +1,45 @@
-- MARS-828: Add TRASH retention statuses to A_SOURCE_FILE_RECEIVED
-- Author: Grzegorz Michalski
-- Date: 2026-02-09
-- Description: Adds ARCHIVED_AND_TRASHED and ARCHIVED_AND_PURGED statuses to support TRASH retention feature
PROMPT ========================================
PROMPT MARS-828: Adding TRASH retention statuses
PROMPT ========================================
-- Drop old constraint
ALTER TABLE CT_MRDS.A_SOURCE_FILE_RECEIVED DROP CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK;
-- Add new constraint with TRASH retention statuses
ALTER TABLE CT_MRDS.A_SOURCE_FILE_RECEIVED ADD CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK
CHECK (PROCESSING_STATUS IN (
'RECEIVED',
'VALIDATED',
'READY_FOR_INGESTION',
'INGESTED',
'ARCHIVED', -- Legacy status (backward compatibility)
'ARCHIVED_AND_TRASHED', -- Files archived to Parquet and kept in TRASH folder (DATA bucket subfolder)
'ARCHIVED_AND_PURGED' -- Files archived to Parquet and deleted from TRASH folder
));
-- Add comment
COMMENT ON COLUMN CT_MRDS.A_SOURCE_FILE_RECEIVED.PROCESSING_STATUS IS
'File processing status: RECEIVED → VALIDATED → READY_FOR_INGESTION → INGESTED → ARCHIVED_AND_TRASHED → ARCHIVED_AND_PURGED (optional)';
-- Verify constraint
SELECT constraint_name, constraint_type, search_condition
FROM user_constraints
WHERE table_name = 'A_SOURCE_FILE_RECEIVED'
AND constraint_name = 'A_SOURCE_FILE_RECEIVED_CHK';
-- Show current status distribution
SELECT PROCESSING_STATUS, COUNT(*) as FILE_COUNT
FROM CT_MRDS.A_SOURCE_FILE_RECEIVED
GROUP BY PROCESSING_STATUS
ORDER BY PROCESSING_STATUS;
PROMPT ========================================
PROMPT TRASH retention statuses added successfully
PROMPT Status flow: INGESTED ARCHIVED_AND_TRASHED ARCHIVED_AND_PURGED (optional)
PROMPT Legacy ARCHIVED status maintained for backward compatibility
PROMPT ========================================

View File

@@ -0,0 +1,77 @@
-- MARS-828 ROLLBACK: Remove TRASH retention statuses from A_SOURCE_FILE_RECEIVED
-- Author: Grzegorz Michalski
-- Date: 2026-02-09
-- Description: Rollback TRASH retention statuses to original constraint
PROMPT ========================================
PROMPT MARS-828 ROLLBACK: Removing TRASH retention statuses
PROMPT ========================================
-- Check if any files have new statuses
DECLARE
vTrashCount NUMBER;
vPurgedCount NUMBER;
BEGIN
SELECT COUNT(*) INTO vTrashCount
FROM CT_MRDS.A_SOURCE_FILE_RECEIVED
WHERE PROCESSING_STATUS = 'ARCHIVED_AND_TRASHED';
SELECT COUNT(*) INTO vPurgedCount
FROM CT_MRDS.A_SOURCE_FILE_RECEIVED
WHERE PROCESSING_STATUS = 'ARCHIVED_AND_PURGED';
DBMS_OUTPUT.PUT_LINE('Files with ARCHIVED_AND_TRASHED status: ' || vTrashCount);
DBMS_OUTPUT.PUT_LINE('Files with ARCHIVED_AND_PURGED status: ' || vPurgedCount);
IF vTrashCount > 0 OR vPurgedCount > 0 THEN
DBMS_OUTPUT.PUT_LINE('');
DBMS_OUTPUT.PUT_LINE('WARNING: Files exist with new statuses!');
DBMS_OUTPUT.PUT_LINE('Migrating statuses to ARCHIVED before rollback...');
-- Migrate new statuses back to ARCHIVED
UPDATE CT_MRDS.A_SOURCE_FILE_RECEIVED
SET PROCESSING_STATUS = 'ARCHIVED'
WHERE PROCESSING_STATUS IN ('ARCHIVED_AND_TRASHED', 'ARCHIVED_AND_PURGED');
COMMIT;
DBMS_OUTPUT.PUT_LINE('Migrated ' || SQL%ROWCOUNT || ' records to ARCHIVED status');
ELSE
DBMS_OUTPUT.PUT_LINE('No files with new statuses - safe to rollback');
END IF;
END;
/
-- Drop new constraint
ALTER TABLE CT_MRDS.A_SOURCE_FILE_RECEIVED DROP CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK;
-- Restore original constraint
ALTER TABLE CT_MRDS.A_SOURCE_FILE_RECEIVED ADD CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK
CHECK (PROCESSING_STATUS IN (
'RECEIVED',
'VALIDATED',
'READY_FOR_INGESTION',
'INGESTED',
'ARCHIVED'
));
-- Remove comment
COMMENT ON COLUMN CT_MRDS.A_SOURCE_FILE_RECEIVED.PROCESSING_STATUS IS
'File processing status: RECEIVED → VALIDATED → READY_FOR_INGESTION → INGESTED → ARCHIVED';
-- Verify constraint restored
SELECT constraint_name, constraint_type, search_condition
FROM user_constraints
WHERE table_name = 'A_SOURCE_FILE_RECEIVED'
AND constraint_name = 'A_SOURCE_FILE_RECEIVED_CHK';
-- Show current status distribution
SELECT PROCESSING_STATUS, COUNT(*) as FILE_COUNT
FROM CT_MRDS.A_SOURCE_FILE_RECEIVED
GROUP BY PROCESSING_STATUS
ORDER BY PROCESSING_STATUS;
PROMPT ========================================
PROMPT TRASH retention statuses rollback completed
PROMPT Original constraint restored
PROMPT All ARCHIVED_AND_TRASHED/ARCHIVED_AND_PURGED migrated to ARCHIVED
PROMPT ========================================

View File

@@ -35,9 +35,9 @@ PROMPT =========================================================================
PROMPT MARS-828 Installation Starting
PROMPT ============================================================================
PROMPT Package: CT_MRDS.FILE_ARCHIVER
PROMPT Change: Enhanced archival strategies (MINIMUM_AGE_MONTHS, HYBRID)
PROMPT Purpose: Flexible archival policies per data source
PROMPT Steps: 8 (DDL, Trigger, Package v3.1.2, Verify, Track, Configure)
PROMPT Change: Enhanced archival strategies (MINIMUM_AGE_MONTHS, HYBRID) + TRASH retention control
PROMPT Purpose: Flexible archival policies per data source with file retention management
PROMPT Steps: 9 (DDL, Trigger, Statuses, Package v3.2.0, Verify, Track, Configure)
PROMPT Timestamp:
SELECT TO_CHAR(SYSDATE, 'YYYY-MM-DD HH24:MI:SS') AS install_start FROM DUAL;
PROMPT ============================================================================
@@ -55,42 +55,47 @@ WHENEVER SQLERROR CONTINUE
-- Installation steps
PROMPT
PROMPT Step 1/8: Adding archival strategy columns to A_SOURCE_FILE_CONFIG
PROMPT Step 1/9: Adding archival strategy columns to A_SOURCE_FILE_CONFIG
PROMPT ===================================================================
@@01_MARS_828_install_add_archival_strategy_columns.sql
PROMPT
PROMPT Step 2/8: Creating validation trigger
PROMPT Step 2/9: Creating validation trigger
PROMPT ======================================
@@02_MARS_828_install_archival_strategy_trigger.sql
PROMPT
PROMPT Step 3/8: Deploying FILE_ARCHIVER Package Specification v3.1.2
PROMPT Step 3/9: Adding TRASH retention statuses to A_SOURCE_FILE_RECEIVED
PROMPT =====================================================================
@@07_MARS_828_install_add_trash_retention_statuses.sql
PROMPT
PROMPT Step 4/9: Deploying FILE_ARCHIVER Package Specification v3.2.0
PROMPT ================================================================
@@03_MARS_828_install_CT_MRDS_FILE_ARCHIVER_SPEC.sql
PROMPT
PROMPT Step 4/8: Deploying FILE_ARCHIVER Package Body v3.1.2
PROMPT Step 5/9: Deploying FILE_ARCHIVER Package Body v3.2.0
PROMPT ======================================================
@@04_MARS_828_install_CT_MRDS_FILE_ARCHIVER_BODY.sql
PROMPT
PROMPT Step 5/8: Verifying installation
PROMPT Step 6/9: Verifying installation
PROMPT =================================
@@05_MARS_828_verify_installation.sql
PROMPT
PROMPT Step 6/8: Tracking package versions
PROMPT Step 7/9: Tracking package versions
PROMPT ====================================
@@track_package_versions.sql
PROMPT
PROMPT Step 7/8: Verifying tracked packages
PROMPT Step 8/9: Verifying tracked packages
PROMPT =====================================
@@verify_packages_version.sql
PROMPT
PROMPT Step 8/8: Configuring Release 01 tables archival strategies
PROMPT Step 9/9: Configuring Release 01 tables archival strategies
PROMPT ============================================================
@@06_MARS_828_configure_release01_tables.sql
@@ -103,16 +108,21 @@ SELECT TO_CHAR(SYSDATE, 'YYYY-MM-DD HH24:MI:SS') AS install_end FROM DUAL;
PROMPT
PROMPT Installation Summary:
PROMPT - Package: CT_MRDS.FILE_ARCHIVER
PROMPT - Version: 3.1.2 (final - includes all fixes)
PROMPT - Version: 3.2.0 (includes TRASH folder retention control)
PROMPT - Strategies: THRESHOLD_BASED (default), MINIMUM_AGE_MONTHS (0=current month), HYBRID
PROMPT - Backward Compatible: Yes (default THRESHOLD_BASED preserved)
PROMPT - TRASH Retention: pKeepInTrash parameter (DEFAULT TRUE for safety)
PROMPT * TRASH is a subfolder in DATA bucket (e.g., TRASH/LM/TABLE_NAME)
PROMPT * Files kept in TRASH by default for compliance and rollback capability
PROMPT - New Statuses: ARCHIVED_AND_TRASHED, ARCHIVED_AND_PURGED
PROMPT - Backward Compatible: Yes (default THRESHOLD_BASED + TRASH retention preserved)
PROMPT - Configured Tables: 25 Release 01 tables (19 LM + 6 CSDB)
PROMPT - Includes Fixes:
PROMPT * v3.1.1: ORA-01422 for multiple parquet files
PROMPT * v3.1.2: PARTITION_YEAR/PARTITION_MONTH assignments
PROMPT * v3.1.2: Export query circular dependency
PROMPT * v3.2.0: TRASH retention control (pKeepInTrash parameter)
PROMPT
PROMPT Note: Incremental patches (v3.1.0->v3.1.1->v3.1.2) available in patches/
PROMPT Note: Incremental patches (v3.1.0->v3.1.1->v3.1.2->v3.2.0) available in patches/
PROMPT
PROMPT Log file: &_filename
PROMPT ============================================================================

View File

@@ -0,0 +1,29 @@
-- ====================================================================
-- A_SOURCE_FILE_RECEIVED Table
-- ====================================================================
-- Purpose: Track received files and their processing status
-- ====================================================================
CREATE TABLE CT_MRDS.A_SOURCE_FILE_RECEIVED (
A_SOURCE_FILE_RECEIVED_KEY NUMBER(38,0) NOT NULL ENABLE,
A_SOURCE_FILE_CONFIG_KEY NUMBER(38,0) NOT NULL ENABLE,
SOURCE_FILE_NAME VARCHAR2(1000) NOT NULL,
CHECKSUM VARCHAR2(128),
CREATED TIMESTAMP(6) WITH TIME ZONE,
BYTES NUMBER,
RECEPTION_DATE DATE NOT NULL,
PROCESSING_STATUS VARCHAR2(200),
EXTERNAL_TABLE_NAME VARCHAR2(200),
PARTITION_YEAR VARCHAR2(4),
PARTITION_MONTH VARCHAR2(2),
ARCH_FILE_NAME VARCHAR2(1000),
CONSTRAINT A_SOURCE_FILE_RECEIVED_PK PRIMARY KEY (A_SOURCE_FILE_RECEIVED_KEY),
CONSTRAINT ASFR_A_SOURCE_FILE_CONFIG_KEY_FK FOREIGN KEY(A_SOURCE_FILE_CONFIG_KEY) REFERENCES CT_MRDS.A_SOURCE_FILE_CONFIG(A_SOURCE_FILE_CONFIG_KEY),
CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK CHECK (PROCESSING_STATUS IN ('RECEIVED', 'VALIDATED', 'READY_FOR_INGESTION', 'INGESTED', 'ARCHIVED', 'ARCHIVED_AND_TRASHED', 'ARCHIVED_AND_PURGED'))
) TABLESPACE "DATA";
-- Unique index for file identification (workaround for TIMESTAMP WITH TIMEZONE constraint limitation)
CREATE UNIQUE INDEX CT_MRDS.A_SOURCE_FILE_RECEIVED_UK1
ON CT_MRDS.A_SOURCE_FILE_RECEIVED(CHECKSUM, CREATED, BYTES);
GRANT SELECT, INSERT, UPDATE, DELETE ON CT_MRDS.A_SOURCE_FILE_RECEIVED TO MRDS_LOADER_ROLE;

View File

@@ -87,7 +87,8 @@ AS
----------------------------------------------------------------------------------------------------
PROCEDURE ARCHIVE_TABLE_DATA (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE,
pKeepInTrash IN BOOLEAN DEFAULT TRUE
)
IS
vSourceFileConfig CT_MRDS.A_SOURCE_FILE_CONFIG%ROWTYPE;
@@ -104,7 +105,10 @@ AS
vProcessControlStatus VARCHAR2(60) := 'OK';
BEGIN
vParameters := CT_MRDS.ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST('pSourceFileConfigKey => '||nvl(to_char(pSourceFileConfigKey), 'NULL')));
vParameters := CT_MRDS.ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST(
'pSourceFileConfigKey => '||nvl(to_char(pSourceFileConfigKey), 'NULL'),
'pKeepInTrash => '||CASE WHEN pKeepInTrash THEN 'TRUE' ELSE 'FALSE' END
));
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Start','INFO', vParameters);
vSourceFileConfig := CT_MRDS.FILE_MANAGER.GET_SOURCE_FILE_CONFIG(pSourceFileConfigKey => pSourceFileConfigKey);
@@ -244,17 +248,17 @@ AS
END;
EXIT WHEN vProcessControlStatus = 'CHANGE_STATUS_TO_ARCHIVED_FAILURE';
-- move file to trash before dropping
-- move file to TRASH subfolder (DATA bucket: ODS/ → TRASH/) before dropping
BEGIN
DBMS_CLOUD.MOVE_OBJECT(source_credential_name => ENV_MANAGER.gvCredentialName,
source_object_uri => f.pathname||'/'||f.filename,
target_object_uri => replace(f.pathname,'ODS','TRASH')||'/'||f.filename,
target_credential_name => ENV_MANAGER.gvCredentialName
);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('File moved to TRASH.','DEBUG', f.pathname||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('File moved to TRASH folder.','DEBUG', f.pathname||'/'||f.filename);
EXCEPTION
WHEN OTHERS THEN
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Failed to move file to TRASH.','ERROR', f.pathname||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Failed to move file to TRASH folder.','ERROR', f.pathname||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT(CT_MRDS.ENV_MANAGER.GET_ERROR_STACK(pFormat => 'TABLE', pCode=> SQLCODE), 'ERROR', vParameters);
rollback;
vProcessControlStatus := 'MOVE_FILE_TO_TRASH_FAILURE';
@@ -264,24 +268,38 @@ AS
END LOOP;
--------------------------------------------------------------------
-- IF All goes fine till this point, we drop files from TRASH (if not then ROLLBACK PART)
-- IF All goes fine till this point, we drop files from TRASH folder (if not then ROLLBACK PART)
-- TRASH is a subfolder in DATA bucket (e.g., TRASH/LM/TABLE_NAME instead of ODS/LM/TABLE_NAME)
IF vProcessControlStatus = 'OK' THEN
FOR f in (select filename, pathname from table(vfiles) where year = ym_loop.year and month = ym_loop.month) LOOP
--Drop file from TRASH
DBMS_CLOUD.DELETE_OBJECT(credential_name => CT_MRDS.ENV_MANAGER.gvCredentialName,
object_uri => replace(f.pathname,'ODS','TRASH')||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('File dropped from TRASH.','DEBUG', f.pathname||'/'||f.filename);
END LOOP;
IF NOT pKeepInTrash THEN
-- Delete files from TRASH folder (cleanup) and update status to ARCHIVED_AND_PURGED
FOR f in (select filename, pathname from table(vfiles) where year = ym_loop.year and month = ym_loop.month) LOOP
DBMS_CLOUD.DELETE_OBJECT(credential_name => CT_MRDS.ENV_MANAGER.gvCredentialName,
object_uri => replace(f.pathname,'ODS','TRASH')||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('File dropped from TRASH folder.','DEBUG', f.pathname||'/'||f.filename);
-- Update status to ARCHIVED_AND_PURGED
UPDATE CT_MRDS.A_SOURCE_FILE_RECEIVED r
SET PROCESSING_STATUS = 'ARCHIVED_AND_PURGED'
WHERE r.a_source_file_config_key = pSourceFileConfigKey
AND r.source_file_name = f.filename
AND r.PROCESSING_STATUS = 'ARCHIVED_AND_TRASHED';
END LOOP;
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('All archived files removed from TRASH folder and marked as ARCHIVED_AND_PURGED.','INFO');
ELSE
-- Keep files in TRASH folder (status remains ARCHIVED_AND_TRASHED)
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Archived files kept in TRASH folder for retention (status: ARCHIVED_AND_TRASHED).','INFO');
END IF;
--ROLLBACK PART
--ROLLBACK PROCESS in case of FAILURE (restore files from TRASH)
--ROLLBACK PROCESS in case of FAILURE (restore files from TRASH subfolder in DATA bucket)
ELSIF vProcessControlStatus = 'MOVE_FILE_TO_TRASH_FAILURE' THEN
FOR f in ( SELECT vf.filename, vf.pathname
FROM TABLE(vfiles) vf
JOIN CT_MRDS.A_SOURCE_FILE_RECEIVED r
ON r.source_file_name = vf.filename
AND r.a_source_file_config_key = pSourceFileConfigKey
AND r.PROCESSING_STATUS = 'ARCHIVED'
AND r.PROCESSING_STATUS IN ('ARCHIVED_AND_TRASHED', 'ARCHIVED_AND_PURGED')
AND vf.year = ym_loop.year
AND vf.month = ym_loop.month
) LOOP
@@ -291,7 +309,7 @@ AS
target_object_uri => f.pathname||'/'||f.filename,
target_credential_name => ENV_MANAGER.gvCredentialName
);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('File restored from TRASH.','DEBUG', f.pathname||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('File restored from TRASH folder.','DEBUG', f.pathname||'/'||f.filename);
UPDATE CT_MRDS.A_SOURCE_FILE_RECEIVED r
SET PROCESSING_STATUS = 'INGESTED'
@@ -302,7 +320,7 @@ AS
EXCEPTION
WHEN OTHERS THEN
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Failed to restore file from TRASH.','ERROR', replace(f.pathname,'ODS','TRASH')||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Failed to restore file from TRASH folder.','ERROR', replace(f.pathname,'ODS','TRASH')||'/'||f.filename);
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT(CT_MRDS.ENV_MANAGER.GET_ERROR_STACK(pFormat => 'TABLE', pCode=> SQLCODE), 'ERROR', vParameters);
vProcessControlStatus := 'RESTORE_FILE_FROM_TRASH_FAILURE';
END;
@@ -363,9 +381,7 @@ AS
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT(CT_MRDS.ENV_MANAGER.GET_ERROR_STACK(pFormat => 'TABLE', pCode=> SQLCODE), 'ERROR', vParameters);
RAISE_APPLICATION_ERROR(CT_MRDS.ENV_MANAGER.CODE_DROP_EXPORTED_FILES_FAILED, CT_MRDS.ENV_MANAGER.MSG_DROP_EXPORTED_FILES_FAILED);
END;
-- END of "Try to drop EXPORTED FILES"
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('All archived files had been dropped.','INFO');
-- END of "Try to drop EXPORTED FILES"
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('End Archiving for YEAR_MONTH: '||ym_loop.year||'_'||ym_loop.month ,'INFO');
END LOOP; --ym_loop end (YEAR_MONTH)
@@ -523,15 +539,19 @@ AS
----------------------------------------------------------------------------------------------------
FUNCTION ARCHIVE_TABLE_DATA (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE,
pKeepInTrash IN BOOLEAN DEFAULT TRUE
) RETURN PLS_INTEGER
IS
vParameters CT_MRDS.A_PROCESS_LOG.PROCEDURE_PARAMETERS%TYPE;
BEGIN
vParameters := CT_MRDS.ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST('pSourceFileConfigKey => '||nvl(to_char(pSourceFileConfigKey), 'NULL')));
vParameters := CT_MRDS.ENV_MANAGER.FORMAT_PARAMETERS(SYS.ODCIVARCHAR2LIST(
'pSourceFileConfigKey => '||nvl(to_char(pSourceFileConfigKey), 'NULL'),
'pKeepInTrash => '||CASE WHEN pKeepInTrash THEN 'TRUE' ELSE 'FALSE' END
));
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('Start','INFO', vParameters);
----
ARCHIVE_TABLE_DATA(pSourceFileConfigKey => pSourceFileConfigKey);
ARCHIVE_TABLE_DATA(pSourceFileConfigKey => pSourceFileConfigKey, pKeepInTrash => pKeepInTrash);
----
CT_MRDS.ENV_MANAGER.LOG_PROCESS_EVENT('End','INFO',vParameters);
RETURN SQLCODE;

View File

@@ -17,12 +17,13 @@ AS
**/
-- Package Version Information (Semantic Versioning: MAJOR.MINOR.PATCH)
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '3.1.2';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-06 11:30:00';
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '3.2.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-06 14:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (Latest changes first)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'3.2.0 (2026-02-06): Added pKeepInTrash parameter (DEFAULT TRUE) to ARCHIVE_TABLE_DATA for TRASH folder retention control - files kept in TRASH subfolder (DATA bucket) by default for safety and compliance' || CHR(13)||CHR(10) ||
'3.1.2 (2026-02-06): Fixed missing PARTITION_YEAR/PARTITION_MONTH assignments in UPDATE statement and export query circular dependency (now filters by workflow_start instead of partition fields)' || CHR(13)||CHR(10) ||
'3.1.1 (2026-02-06): Fixed ORA-01422 error when DBMS_CLOUD.EXPORT_DATA creates multiple parquet files (parallel execution). Now stores archive directory prefix instead of individual filenames' || CHR(13)||CHR(10) ||
'3.1.0 (2026-01-29): Added function overloads for ARCHIVE_TABLE_DATA and GATHER_TABLE_STAT returning SQLCODE for Python library integration' || CHR(13)||CHR(10) ||
@@ -38,9 +39,11 @@ AS
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data from table specified by pSourceFileConfigKey(A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY) into PARQUET file on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* @param pKeepInTrash - When TRUE (default), files are kept in TRASH folder (DATA bucket subfolder) for safety. When FALSE, files are deleted from TRASH after successful archive.
**/
PROCEDURE ARCHIVE_TABLE_DATA (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE,
pKeepInTrash IN BOOLEAN DEFAULT TRUE
);
/**
@@ -48,11 +51,13 @@ AS
* @desc Function overload for ARCHIVE_TABLE_DATA procedure.
* Returns SQLCODE for Python library integration.
* Calls the main ARCHIVE_TABLE_DATA procedure and captures execution result.
* @param pKeepInTrash - When TRUE (default), files are kept in TRASH folder (DATA bucket subfolder) for safety. When FALSE, files are deleted from TRASH after successful archive.
* @example SELECT FILE_ARCHIVER.ARCHIVE_TABLE_DATA(pSourceFileConfigKey => 123) FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION ARCHIVE_TABLE_DATA (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE,
pKeepInTrash IN BOOLEAN DEFAULT TRUE
) RETURN PLS_INTEGER;

View File

@@ -32,10 +32,11 @@ PROMPT =========================================================================
PROMPT This will restore FILE_ARCHIVER to v2.0.0
PROMPT
PROMPT Rollback steps:
PROMPT 1. Remove validation trigger
PROMPT 2. Drop ARCHIVAL_STRATEGY and MINIMUM_AGE_MONTHS columns
PROMPT 3. Restore FILE_ARCHIVER package to v2.0.0
PROMPT 4. Revert all archival strategies to THRESHOLD_BASED
PROMPT 1. Rollback TRASH retention statuses
PROMPT 2. Remove validation trigger
PROMPT 3. Drop ARCHIVAL_STRATEGY and MINIMUM_AGE_MONTHS columns
PROMPT 4. Restore FILE_ARCHIVER package to v2.0.0
PROMPT 5. Revert all archival strategies to THRESHOLD_BASED
PROMPT
PROMPT Timestamp:
SELECT TO_CHAR(SYSDATE, 'YYYY-MM-DD HH24:MI:SS') AS rollback_start FROM DUAL;
@@ -54,27 +55,32 @@ WHENEVER SQLERROR CONTINUE
-- Rollback steps (in reverse order)
PROMPT
PROMPT Step 1/5: Dropping validation trigger
PROMPT Step 1/6: Rolling back TRASH retention statuses
PROMPT ================================================
@@90_MARS_828_rollback_trash_retention_statuses.sql
PROMPT
PROMPT Step 2/6: Dropping validation trigger
PROMPT ======================================
@@93_MARS_828_rollback_trigger.sql
PROMPT
PROMPT Step 2/5: Dropping archival strategy columns
PROMPT Step 3/6: Dropping archival strategy columns
PROMPT =============================================
@@94_MARS_828_rollback_columns.sql
PROMPT
PROMPT Step 3/5: Restoring FILE_ARCHIVER Package Specification v2.0.0
PROMPT Step 4/6: Restoring FILE_ARCHIVER Package Specification v2.0.0
PROMPT ===============================================================
@@91_MARS_828_rollback_FILE_ARCHIVER_SPEC.sql
PROMPT
PROMPT Step 4/5: Restoring FILE_ARCHIVER Package Body v2.0.0
PROMPT Step 5/6: Restoring FILE_ARCHIVER Package Body v2.0.0
PROMPT ======================================================
@@92_MARS_828_rollback_FILE_ARCHIVER_BODY.sql
PROMPT
PROMPT Step 5/5: Verifying tracked packages
PROMPT Step 6/6: Verifying tracked packages
PROMPT =====================================
@@verify_packages_version.sql

View File

@@ -19,7 +19,7 @@ CREATE TABLE CT_MRDS.A_SOURCE_FILE_RECEIVED (
ARCH_FILE_NAME VARCHAR2(1000),
CONSTRAINT A_SOURCE_FILE_RECEIVED_PK PRIMARY KEY (A_SOURCE_FILE_RECEIVED_KEY),
CONSTRAINT ASFR_A_SOURCE_FILE_CONFIG_KEY_FK FOREIGN KEY(A_SOURCE_FILE_CONFIG_KEY) REFERENCES CT_MRDS.A_SOURCE_FILE_CONFIG(A_SOURCE_FILE_CONFIG_KEY),
CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK CHECK (PROCESSING_STATUS IN ('RECEIVED', 'VALIDATED', 'READY_FOR_INGESTION', 'INGESTED', 'ARCHIVED'))
CONSTRAINT A_SOURCE_FILE_RECEIVED_CHK CHECK (PROCESSING_STATUS IN ('RECEIVED', 'VALIDATED', 'READY_FOR_INGESTION', 'INGESTED', 'ARCHIVED', 'ARCHIVED_AND_TRASHED', 'ARCHIVED_AND_PURGED'))
) TABLESPACE "DATA";
-- Unique index for file identification (workaround for TIMESTAMP WITH TIMEZONE constraint limitation)

View File

@@ -18,7 +18,7 @@ The FILE_ARCHIVER package provides flexible archival strategies that accommodate
- **Schema**: CT_MRDS
- **Package**: FILE_ARCHIVER
- **Current Version**: 3.1.0
- **Current Version**: 3.2.0
- **Dependencies**: ENV_MANAGER, FILE_MANAGER, cloud_wrapper, A_SOURCE_FILE_CONFIG, A_SOURCE_FILE_RECEIVED, A_WORKFLOW_HISTORY
### Critical Prerequisites
@@ -177,30 +177,46 @@ WHERE ...;
├─ Active data processing (Airflow + DBT)
├─ External tables read data from bucket
├─ Status: INGESTED
─ FILE_ARCHIVER.ARCHIVE_TABLE_DATA archives based on strategy
─ FILE_ARCHIVER.ARCHIVE_TABLE_DATA archives based on strategy
└─ CSV files moved to TRASH subfolder (ODS → TRASH/)
2.1 TRASH Subfolder (DATA Bucket - File Retention)
├─ Located in DATA bucket (e.g., TRASH/LM/TABLE_NAME)
├─ Stores CSV files after archival to Parquet
├─ Status: ARCHIVED_AND_TRASHED (default retention)
├─ Enables rollback if archival issues occur
└─ Optional cleanup: ARCHIVED_AND_PURGED (pKeepInTrash=FALSE)
3. ARCHIVE Bucket (Long-term Storage)
├─ Historical data in Parquet format
├─ Hive-style partitioning: PARTITION_YEAR=/PARTITION_MONTH=
├─ Status: ARCHIVED
├─ Status: ARCHIVED_AND_TRASHED or ARCHIVED_AND_PURGED
└─ Optimized for big data analytics (Spark, Hive)
```
### Archival Process
The FILE_ARCHIVER package automatically manages data movement from ODS to ARCHIVE:
**Key Procedures**:
- `ARCHIVE_TABLE_DATA` - Main archival procedure using strategy-specific WHERE clause
- `ARCHIVE_TABLE_DATA(pSourceFileConfigKey, pKeepInTrash)` - Main archival procedure using strategy-specific WHERE clause
- `pKeepInTrash` (BOOLEAN, DEFAULT TRUE) - Controls TRASH folder retention
- TRUE: Files kept in TRASH folder for safety and rollback capability (default)
- FALSE: Files deleted from TRASH folder after successful archival
- `GET_ARCHIVAL_WHERE_CLAUSE` - Returns WHERE clause based on configured strategy
- `GATHER_TABLE_STAT` - Calculates archival statistics using strategy logic
**Archival Execution**:
```sql
-- Triggered by FILE_MANAGER or scheduled job
-- Default behavior: Keep files in TRASH folder (ARCHIVED_AND_TRASHED status)
BEGIN
CT_MRDS.FILE_ARCHIVER.ARCHIVE_TABLE_DATA(
pSourceFileConfig => vSourceFileConfigRecord
pSourceFileConfigKey => vSourceFileConfigKey,
pKeepInTrash => TRUE -- DEFAULT value
);
END;
/
-- Optional: Delete files from TRASH after archival (ARCHIVED_AND_PURGED status)
BEGIN
CT_MRDS.FILE_ARCHIVER.ARCHIVE_TABLE_DATA(
pSourceFileConfigKey => vSourceFileConfigKey,
pKeepInTrash => FALSE -- Cleanup TRASH folder
);
END;
/
@@ -210,7 +226,9 @@ END;
- Package retrieves ARCHIVAL_STRATEGY from A_SOURCE_FILE_CONFIG
- GET_ARCHIVAL_WHERE_CLAUSE generates appropriate WHERE clause
- Data matching criteria moved from ODS to ARCHIVE bucket
- Parquet format with Hive-style partitioning applied
- CSV files moved to TRASH subfolder in DATA bucket (ODS/ → TRASH/)
- Parquet format with Hive-style partitioning applied to ARCHIVE bucket
- TRASH retention controlled by pKeepInTrash parameter
## Configuration Examples
@@ -527,8 +545,11 @@ WHERE object_name = 'FILE_ARCHIVER';
### OCI Buckets
- **INBOX**: Incoming file validation (`'INBOX/{SOURCE}/{SOURCE_FILE_ID}/{TABLE_NAME}/'`)
- **ODS/DATA**: Operational data processing (`'ODS/{SOURCE}/{TABLE_NAME}/'`)
- **TRASH**: File retention subfolder in DATA bucket (`'TRASH/{SOURCE}/{TABLE_NAME}/'`) - CSV files after archival
- **ARCHIVE**: Historical data storage (`'ARCHIVE/{SOURCE}/{TABLE_NAME}/PARTITION_YEAR=/PARTITION_MONTH=/'`)
**Note**: TRASH is NOT a separate bucket - it's a subfolder within the DATA bucket for file retention and rollback capability.
## Best Practices
### Strategy Selection Guidelines
@@ -609,10 +630,53 @@ WHERE object_name = 'FILE_ARCHIVER';
- Check for tables without archival configuration
- Optimize MINIMUM_AGE_MONTHS based on actual usage patterns
### TRASH Folder Retention Best Practices
1. **Default Behavior (pKeepInTrash = TRUE - Recommended)**:
- Keeps CSV files in TRASH folder after archival
- Provides safety net for rollback if archival issues occur
- Supports compliance and audit requirements
- Status: ARCHIVED_AND_TRASHED
- Use for: Production environments, regulatory compliance, critical data
2. **TRASH Cleanup (pKeepInTrash = FALSE)**:
- Deletes CSV files from TRASH folder after successful archival
- Reduces storage costs in DATA bucket
- Status: ARCHIVED_AND_PURGED
- Use for: Non-critical data, storage optimization, test environments
3. **Monitoring TRASH Folder**:
```sql
-- Check files in TRASH retention
SELECT
SOURCE_FILE_NAME,
PROCESSING_STATUS,
ARCH_FILE_NAME,
PARTITION_YEAR,
PARTITION_MONTH
FROM CT_MRDS.A_SOURCE_FILE_RECEIVED
WHERE PROCESSING_STATUS IN ('ARCHIVED_AND_TRASHED', 'ARCHIVED_AND_PURGED')
AND RECEPTION_DATE > SYSDATE - 30
ORDER BY PROCESSING_STATUS, RECEPTION_DATE DESC;
```
4. **TRASH Folder Structure**:
```
DATA Bucket:
├── ODS/LM/STANDING_FACILITIES/file.csv -- Active operational data
└── TRASH/LM/STANDING_FACILITIES/file.csv -- Retained after archival
ARCHIVE Bucket:
└── ARCHIVE/LM/STANDING_FACILITIES/
└── PARTITION_YEAR=2026/
└── PARTITION_MONTH=02/
└── *.parquet -- Archived data
```
## Author
Created by: Grzegorz Michalski
Date: 2026-02-04
Date: 2026-02-06
Schema: CT_MRDS
Package: FILE_ARCHIVER
Version: 3.1.0
Version: 3.2.0

View File

@@ -371,11 +371,14 @@ INBOX Bucket - Pattern: 'INBOX/{SOURCE}/{SOURCE_FILE_ID}/{TABLE_NAME}/'
└── {pTableId}/ -- e.g., "A_UC_DISSEM_METADATA_LOADS", "STANDING_FACILITIES"
└── files matching {pSourceFileNamePattern}
ODS Bucket - Pattern: 'ODS/{SOURCE}/{TABLE_NAME}/'
── ODS/
DATA Bucket - Patterns: 'ODS/{SOURCE}/{TABLE_NAME}/' and 'TRASH/{SOURCE}/{TABLE_NAME}/'
── ODS/
│ └── {pSourceKey}/ -- e.g., "C2D", "LM"
│ └── {pTableId}/ -- e.g., "A_UC_DISSEM_METADATA_LOADS", "STANDING_FACILITIES"
│ └── processed files
└── TRASH/ -- File retention subfolder (not a separate bucket)
└── {pSourceKey}/ -- e.g., "C2D", "LM"
└── {pTableId}/ -- e.g., "A_UC_DISSEM_METADATA_LOADS", "STANDING_FACILITIES"
└── processed files
└── {pTableId}/ -- CSV files after archival (ARCHIVED_AND_TRASHED status)
ARCHIVE Bucket - Pattern: 'ARCHIVE/{SOURCE}/{TABLE_NAME}/'
└── ARCHIVE/
@@ -389,9 +392,11 @@ ARCHIVE Bucket - Pattern: 'ARCHIVE/{SOURCE}/{TABLE_NAME}/'
**Critical Path Pattern Requirements:**
- **INBOX** requires full 3-level path: `INBOX/{SOURCE}/{SOURCE_FILE_ID}/{TABLE_NAME}/`
- **ODS** uses simplified 2-level path: `ODS/{SOURCE}/{TABLE_NAME}/` (no SOURCE_FILE_ID)
- **TRASH** uses simplified 2-level path: `TRASH/{SOURCE}/{TABLE_NAME}/` (subfolder in DATA bucket)
- **ARCHIVE** uses simplified 2-level path: `ARCHIVE/{SOURCE}/{TABLE_NAME}/` (no SOURCE_FILE_ID)
- **All patterns are mandatory** - no simplified versions allowed
- File names must match `pSourceFileNamePattern` for automatic processing
- **Note**: TRASH is NOT a separate bucket - it's a subfolder within the DATA bucket
## Configuration Management Best Practices
@@ -693,7 +698,10 @@ SELECT FILE_MANAGER.PROCESS_SOURCE_FILE(
1. **File Arrival**: File is uploaded to Oracle Cloud Storage bucket
2. **Registration**: FILE_MANAGER.REGISTER_SOURCE_FILE_RECEIVED() creates record
3. **Status**: RECEIVED → VALIDATED → READY_FOR_INGESTION → INGESTED → ARCHIVED
3. **Status**: RECEIVED → VALIDATED → READY_FOR_INGESTION → INGESTED → ARCHIVED_AND_TRASHED → ARCHIVED_AND_PURGED (optional)
- Legacy ARCHIVED status maintained for backward compatibility
- ARCHIVED_AND_TRASHED: Files archived to Parquet and kept in TRASH folder (default)
- ARCHIVED_AND_PURGED: Files archived to Parquet and deleted from TRASH folder
4. **External Table**: Created automatically based on template table
5. **Data Loading**: Data is loaded into target ODS schema
6. **Archival**: File is moved to archive bucket after processing

View File

@@ -164,7 +164,9 @@ ORDER BY RECEPTION_DATE DESC;
| `VALIDATED` | File validation completed successfully | After successful validation |
| `READY_FOR_INGESTION` | File validated and prepared for Airflow+DBT processing | After successful validation and preparation |
| `INGESTED` | Data has been consumed/ingested by target system | After data consumption |
| `ARCHIVED` | Data exported to PARQUET format and file moved to archival storage | Final archival state using FILE_ARCHIVER |
| `ARCHIVED` | (Legacy) Data exported to PARQUET format and file moved to archival storage | Legacy archival state (backward compatibility) |
| `ARCHIVED_AND_TRASHED` | Data archived to Parquet, CSV files kept in TRASH folder (default) | Archival with file retention using FILE_ARCHIVER |
| `ARCHIVED_AND_PURGED` | Data archived to Parquet, CSV files deleted from TRASH folder | Archival with TRASH cleanup (pKeepInTrash=FALSE) |
| `VALIDATION_FAILED` | File validation failed | After failed validation |

View File

@@ -68,7 +68,9 @@ ARCH_FILE_NAME VARCHAR2 -- Parquet archive file path
**Status Workflow**:
```
RECEIVED → VALIDATED → READY_FOR_INGESTION → INGESTED → ARCHIVED
RECEIVED → VALIDATED → READY_FOR_INGESTION → INGESTED → ARCHIVED_AND_TRASHED → ARCHIVED_AND_PURGED (optional)
Note: Legacy ARCHIVED status maintained for backward compatibility
```
**Usage Pattern**:

View File

@@ -394,6 +394,9 @@ DATA Bucket:
├── ODS/
│ └── {SOURCE}/
│ └── {TABLE_NAME}/
└── TRASH/ -- File retention subfolder (not a separate bucket)
└── {SOURCE}/
└── {TABLE_NAME}/ -- CSV files after archival (ARCHIVED_AND_TRASHED status)
ARCHIVE Bucket:
└── ARCHIVE/
@@ -402,6 +405,8 @@ ARCHIVE Bucket:
└── PARTITION_YEAR=*/
└── PARTITION_MONTH=*/
└── *.parquet
Note: TRASH is a subfolder within the DATA bucket for file retention and rollback capability.
```
### 4. Migration Checklist

View File

@@ -123,7 +123,8 @@ WHEN OTHERS THEN
```sql
-- Dodano 'VALIDATION_FAILED' do dozwolonych statusów
PROCESSING_STATUS IN ('RECEIVED', 'VALIDATED', 'READY_FOR_INGESTION',
'INGESTED', 'ARCHIVED', 'VALIDATION_FAILED')
'INGESTED', 'ARCHIVED', 'ARCHIVED_AND_TRASHED',
'ARCHIVED_AND_PURGED', 'VALIDATION_FAILED')
```
## 📊 Testowanie