Files
mars/MARS_Packages/REL02_POST/MARS-1409/new_version/FILE_ARCHIVER.pkg

284 lines
18 KiB
Plaintext

create or replace PACKAGE CT_MRDS.FILE_ARCHIVER
AUTHID CURRENT_USER
AS
/**
* General comment for package: Please put comments for functions and procedures as shown in below example.
* It is a standard.
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
* which returns documentation text for confluence page (to Copy-Paste it).
**/
-- Example comment:
/**
* @name EX_PROCEDURE_NAME
* @desc Procedure description
* @example select LOGGING_AND_ERROR_MANAGER.EX_PROCEDURE_NAME(pParameter => 129) from dual;
* @ex_rslt Example Result
**/
-- Package Version Information (Semantic Versioning: MAJOR.MINOR.PATCH)
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '3.4.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-03-17 11:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (Latest changes first)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'3.4.0 (2026-03-17): MARS-1409 - Added IS_WORKFLOW_SUCCESS_REQUIRED flag to A_SOURCE_FILE_CONFIG (DEFAULT Y). ' ||
'Y=standard DBT flow (WORKFLOW_SUCCESSFUL=Y required), N=bypass for manual/non-DBT sources. ' ||
'Flag value stored in A_TABLE_STAT and A_TABLE_STAT_HIST for full audit of statistics basis.' || CHR(13)||CHR(10) ||
'3.3.1 (2026-03-13): Fixed ORA-29913 handling in ARCHIVE_TABLE_DATA (graceful RETURN when ODS bucket is empty) and GATHER_TABLE_STAT (saves zero statistics instead of raising error)' || CHR(13)||CHR(10) ||
'3.3.0 (2026-02-11): Added IS_ARCHIVE_ENABLED and IS_KEPT_IN_TRASH columns to A_SOURCE_FILE_CONFIG for selective archiving and config-based TRASH policy. Removed pKeepInTrash parameter (now from config). Added ARCHIVE_ALL batch procedure with 3-level granularity (config/source/all). Added GATHER_TABLE_STAT_ALL batch statistics procedure with 3-level granularity. Added RESTORE_FILE_FROM_TRASH and PURGE_TRASH_FOLDER with 3-level granularity' || CHR(13)||CHR(10) ||
'3.2.1 (2026-02-10): Fixed status update - ARCHIVED → ARCHIVED_AND_TRASHED when moving files to TRASH folder (critical bug fix)' || CHR(13)||CHR(10) ||
'3.2.0 (2026-02-06): Added pKeepInTrash parameter (DEFAULT TRUE) to ARCHIVE_TABLE_DATA for TRASH folder retention control - files kept in TRASH subfolder (DATA bucket) by default for safety and compliance' || CHR(13)||CHR(10) ||
'3.1.2 (2026-02-06): Fixed missing PARTITION_YEAR/PARTITION_MONTH assignments in UPDATE statement and export query circular dependency (now filters by workflow_start instead of partition fields)' || CHR(13)||CHR(10) ||
'3.1.1 (2026-02-06): Fixed ORA-01422 error when DBMS_CLOUD.EXPORT_DATA creates multiple parquet files (parallel execution). Now stores archive directory prefix instead of individual filenames' || CHR(13)||CHR(10) ||
'3.1.0 (2026-01-29): Added function overloads for ARCHIVE_TABLE_DATA and GATHER_TABLE_STAT returning SQLCODE for Python library integration' || CHR(13)||CHR(10) ||
'3.0.0 (2026-01-27): MARS-828 - Added flexible archival strategies (MINIMUM_AGE_MONTHS with 0=current month, HYBRID) via ARCHIVAL_STRATEGY configuration' || CHR(13)||CHR(10) ||
'2.0.0 (2025-10-22): Added package versioning system using centralized ENV_MANAGER functions' || CHR(13)||CHR(10) ||
'1.5.0 (2025-10-18): Enhanced ARCHIVE_TABLE_DATA with Hive-style partitioning support' || CHR(13)||CHR(10) ||
'1.0.0 (2025-09-15): Initial release with table archival and statistics gathering';
cgBL CONSTANT VARCHAR2(2) := ENV_MANAGER.cgBL;
/**
* @name GET_TABLE_STAT
* @desc Private function to retrieve table statistics for archival processing.
* Returns A_TABLE_STAT record with table metadata and row counts.
* @param pSourceFileConfigKey - Configuration key for source file
* @return CT_MRDS.A_TABLE_STAT%ROWTYPE - Table statistics record
* @private Internal function for archival operations
**/
FUNCTION GET_TABLE_STAT(pSourceFileConfigKey IN NUMBER) RETURN CT_MRDS.A_TABLE_STAT%ROWTYPE;
/**
* @name ARCHIVE_TABLE_DATA
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
* Exports data from table specified by pSourceFileConfigKey(A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY) into PARQUET file on OCI infrustructure.
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
* TRASH policy is controlled by A_SOURCE_FILE_CONFIG.IS_KEPT_IN_TRASH column ('Y'=keep in TRASH, 'N'=delete immediately).
**/
PROCEDURE ARCHIVE_TABLE_DATA (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
);
/**
* @name FN_ARCHIVE_TABLE_DATA
* @desc Function wrapper for ARCHIVE_TABLE_DATA procedure.
* Returns SQLCODE for Python library integration.
* Calls the main ARCHIVE_TABLE_DATA procedure and captures execution result.
* TRASH policy is controlled by A_SOURCE_FILE_CONFIG.IS_KEPT_IN_TRASH column ('Y'=keep in TRASH, 'N'=delete immediately).
* @example SELECT FILE_ARCHIVER.FN_ARCHIVE_TABLE_DATA(pSourceFileConfigKey => 123) FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION FN_ARCHIVE_TABLE_DATA (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
) RETURN PLS_INTEGER;
/**
* @name GATHER_TABLE_STAT
* @desc Gather info about EXTERNAL TABLE specified by pSourceFileConfigKey parameter (A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY).
* Data is inserted into A_TABLE_STAT and A_TABLE_STAT_HIST.
**/
PROCEDURE GATHER_TABLE_STAT (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
);
/**
* @name FN_GATHER_TABLE_STAT
* @desc Function wrapper for GATHER_TABLE_STAT procedure.
* Returns SQLCODE for Python library integration.
* Calls the main GATHER_TABLE_STAT procedure and captures execution result.
* @example SELECT FILE_ARCHIVER.FN_GATHER_TABLE_STAT(pSourceFileConfigKey => 123) FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION FN_GATHER_TABLE_STAT (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
) RETURN PLS_INTEGER;
/**
* @name GATHER_TABLE_STAT_ALL
* @desc Multi-level batch statistics gathering procedure with three granularity levels.
* Processes configurations based on IS_ARCHIVE_ENABLED setting (when pOnlyEnabled=TRUE).
* Gathers statistics for external tables and inserts data into A_TABLE_STAT and A_TABLE_STAT_HIST.
* @param pSourceFileConfigKey - (LEVEL 1) Gather stats for specific configuration key (highest priority)
* @param pSourceKey - (LEVEL 2) Gather stats for all tables in source system (e.g., 'LM', 'C2D') (medium priority)
* @param pGatherAll - (LEVEL 3) When TRUE, gather stats for ALL tables across all sources (lowest priority)
* @param pOnlyEnabled - When TRUE (default), only process tables with IS_ARCHIVE_ENABLED='Y'
* @example -- Level 1: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pSourceFileConfigKey => 123);
* @example -- Level 2: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pSourceKey => 'LM');
* @example -- Level 3: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pGatherAll => TRUE);
* @example -- All tables regardless of IS_ARCHIVE_ENABLED: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pGatherAll => TRUE, pOnlyEnabled => FALSE);
**/
PROCEDURE GATHER_TABLE_STAT_ALL (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
pGatherAll IN BOOLEAN DEFAULT FALSE,
pOnlyEnabled IN BOOLEAN DEFAULT TRUE
);
/**
* @name FN_GATHER_TABLE_STAT_ALL
* @desc Function wrapper for GATHER_TABLE_STAT_ALL procedure.
* Returns SQLCODE for Python library integration.
* Calls the main GATHER_TABLE_STAT_ALL procedure and captures execution result.
* @param pSourceFileConfigKey - (LEVEL 1) Gather stats for specific configuration key (highest priority)
* @param pSourceKey - (LEVEL 2) Gather stats for all tables in source system (medium priority)
* @param pGatherAll - (LEVEL 3) When TRUE, gather stats for ALL tables across all sources (lowest priority)
* @param pOnlyEnabled - When TRUE (default), only process tables with IS_ARCHIVE_ENABLED='Y'
* @example SELECT FILE_ARCHIVER.FN_GATHER_TABLE_STAT_ALL(pSourceKey => 'LM') FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION FN_GATHER_TABLE_STAT_ALL (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
pGatherAll IN BOOLEAN DEFAULT FALSE,
pOnlyEnabled IN BOOLEAN DEFAULT TRUE
) RETURN PLS_INTEGER;
/**
* @name ARCHIVE_ALL
* @desc Multi-level batch archival procedure with three granularity levels.
* Only processes configurations where IS_ARCHIVE_ENABLED='Y'.
* TRASH policy for each table is controlled by individual IS_KEPT_IN_TRASH column.
* @param pSourceFileConfigKey - (LEVEL 1) Archive specific configuration key (highest priority)
* @param pSourceKey - (LEVEL 2) Archive all enabled tables for source system (e.g., 'LM', 'C2D') (medium priority)
* @param pArchiveAll - (LEVEL 3) When TRUE, archive ALL enabled tables across all sources (lowest priority)
* @example -- Level 1: CALL FILE_ARCHIVER.ARCHIVE_ALL(pSourceFileConfigKey => 123);
* @example -- Level 2: CALL FILE_ARCHIVER.ARCHIVE_ALL(pSourceKey => 'LM');
* @example -- Level 3: CALL FILE_ARCHIVER.ARCHIVE_ALL(pArchiveAll => TRUE);
**/
PROCEDURE ARCHIVE_ALL (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
pArchiveAll IN BOOLEAN DEFAULT FALSE
);
/**
* @name FN_ARCHIVE_ALL
* @desc Function wrapper for ARCHIVE_ALL procedure.
* Returns SQLCODE for Python library integration.
* Calls the main ARCHIVE_ALL procedure and captures execution result.
* @param pSourceFileConfigKey - (LEVEL 1) Archive specific configuration key (highest priority)
* @param pSourceKey - (LEVEL 2) Archive all enabled tables for source system (medium priority)
* @param pArchiveAll - (LEVEL 3) When TRUE, archive ALL enabled tables across all sources (lowest priority)
* @example SELECT FILE_ARCHIVER.FN_ARCHIVE_ALL(pSourceKey => 'LM') FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION FN_ARCHIVE_ALL (
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
pArchiveAll IN BOOLEAN DEFAULT FALSE
) RETURN PLS_INTEGER;
/**
* @name RESTORE_FILE_FROM_TRASH
* @desc Restores files from TRASH folder back to ODS at three different granularity levels.
* Moves files from TRASH subfolder back to ODS subfolder in DATA bucket.
* Updates status from ARCHIVED_AND_TRASHED to INGESTED and clears archival metadata.
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to restore by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
* @param pSourceFileConfigKey - (LEVEL 2) Restore all files for specific configuration key (medium priority)
* @param pRestoreAll - (LEVEL 1) When TRUE, restore ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
* @example -- Restore single file: CALL FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pSourceFileReceivedKey => 12345);
* @example -- Restore all files for config: CALL FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pSourceFileConfigKey => 341);
* @example -- Restore all TRASH globally: CALL FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pRestoreAll => TRUE);
**/
PROCEDURE RESTORE_FILE_FROM_TRASH (
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pRestoreAll IN BOOLEAN DEFAULT FALSE
);
/**
* @name RESTORE_FILE_FROM_TRASH
* @desc Function overload for RESTORE_FILE_FROM_TRASH procedure.
* Returns SQLCODE for Python library integration.
* Calls the main RESTORE_FILE_FROM_TRASH procedure and captures execution result.
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to restore by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
* @param pSourceFileConfigKey - (LEVEL 2) Restore all files for specific configuration key (medium priority)
* @param pRestoreAll - (LEVEL 1) When TRUE, restore ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
* @example SELECT FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pSourceFileReceivedKey => 12345) FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION RESTORE_FILE_FROM_TRASH (
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pRestoreAll IN BOOLEAN DEFAULT FALSE
) RETURN PLS_INTEGER;
/**
* @name PURGE_TRASH_FOLDER
* @desc Deletes files from TRASH folder at three different granularity levels.
* Updates status from ARCHIVED_AND_TRASHED to ARCHIVED_AND_PURGED for all affected files.
* WARNING: This operation is irreversible - files are permanently deleted from TRASH.
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to delete by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
* @param pSourceFileConfigKey - (LEVEL 2) Delete all files for specific configuration key (medium priority)
* @param pPurgeAll - (LEVEL 1) When TRUE, delete ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
* @example -- Delete single file: CALL FILE_ARCHIVER.PURGE_TRASH_FOLDER(pSourceFileReceivedKey => 12345);
* @example -- Delete all files for config: CALL FILE_ARCHIVER.PURGE_TRASH_FOLDER(pSourceFileConfigKey => 341);
* @example -- Delete all TRASH globally: CALL FILE_ARCHIVER.PURGE_TRASH_FOLDER(pPurgeAll => TRUE);
**/
PROCEDURE PURGE_TRASH_FOLDER (
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pPurgeAll IN BOOLEAN DEFAULT FALSE
);
/**
* @name PURGE_TRASH_FOLDER
* @desc Function overload for PURGE_TRASH_FOLDER procedure.
* Returns SQLCODE for Python library integration.
* Calls the main PURGE_TRASH_FOLDER procedure and captures execution result.
* WARNING: This operation is irreversible - files are permanently deleted from TRASH.
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to delete by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
* @param pSourceFileConfigKey - (LEVEL 2) Delete all files for specific configuration key (medium priority)
* @param pPurgeAll - (LEVEL 1) When TRUE, delete ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
* @example SELECT FILE_ARCHIVER.PURGE_TRASH_FOLDER(pSourceFileReceivedKey => 12345) FROM DUAL;
* @ex_rslt 0 (success) or error code
**/
FUNCTION PURGE_TRASH_FOLDER (
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
pPurgeAll IN BOOLEAN DEFAULT FALSE
) RETURN PLS_INTEGER;
---------------------------------------------------------------------------------------------------------------------------
-- PACKAGE VERSION MANAGEMENT FUNCTIONS
---------------------------------------------------------------------------------------------------------------------------
/**
* @name GET_VERSION
* @desc Returns the current version number of the FILE_ARCHIVER package.
* Uses semantic versioning format (MAJOR.MINOR.PATCH).
* @example SELECT FILE_ARCHIVER.GET_VERSION() FROM DUAL;
* @ex_rslt 2.0.0
**/
FUNCTION GET_VERSION RETURN VARCHAR2;
/**
* @name GET_BUILD_INFO
* @desc Returns comprehensive build information including version, build date, and author.
* Uses centralized ENV_MANAGER.GET_PACKAGE_VERSION_INFO function.
* @example SELECT FILE_ARCHIVER.GET_BUILD_INFO() FROM DUAL;
* @ex_rslt Package: FILE_ARCHIVER
* Version: 2.0.0
* Build Date: 2025-10-22 16:45:00
* Author: Grzegorz Michalski
**/
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
/**
* @name GET_VERSION_HISTORY
* @desc Returns complete version history with all releases and changes.
* Uses centralized ENV_MANAGER.FORMAT_VERSION_HISTORY function.
* @example SELECT FILE_ARCHIVER.GET_VERSION_HISTORY() FROM DUAL;
* @ex_rslt FILE_ARCHIVER Version History:
* 2.0.0 (2025-10-22): Added package versioning system...
**/
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
END;
/