280 lines
17 KiB
Plaintext
280 lines
17 KiB
Plaintext
create or replace PACKAGE CT_MRDS.FILE_ARCHIVER
|
|
AUTHID CURRENT_USER
|
|
AS
|
|
/**
|
|
* General comment for package: Please put comments for functions and procedures as shown in below example.
|
|
* It is a standard.
|
|
* The structure of comment is used by GET_PACKAGE_DOCUMENTATION function
|
|
* which returns documentation text for confluence page (to Copy-Paste it).
|
|
**/
|
|
|
|
-- Example comment:
|
|
/**
|
|
* @name EX_PROCEDURE_NAME
|
|
* @desc Procedure description
|
|
* @example select LOGGING_AND_ERROR_MANAGER.EX_PROCEDURE_NAME(pParameter => 129) from dual;
|
|
* @ex_rslt Example Result
|
|
**/
|
|
|
|
-- Package Version Information (Semantic Versioning: MAJOR.MINOR.PATCH)
|
|
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '3.3.0';
|
|
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-02-11 12:00:00';
|
|
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
|
|
|
|
-- Version History (Latest changes first)
|
|
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
|
|
'3.3.0 (2026-02-11): Added IS_ARCHIVE_ENABLED and IS_KEEP_IN_TRASH columns to A_SOURCE_FILE_CONFIG for selective archiving and config-based TRASH policy. Removed pKeepInTrash parameter (now from config). Added ARCHIVE_ALL batch procedure with 3-level granularity (config/source/all). Added GATHER_TABLE_STAT_ALL batch statistics procedure with 3-level granularity. Added RESTORE_FILE_FROM_TRASH and PURGE_TRASH_FOLDER with 3-level granularity' || CHR(13)||CHR(10) ||
|
|
'3.2.1 (2026-02-10): Fixed status update - ARCHIVED → ARCHIVED_AND_TRASHED when moving files to TRASH folder (critical bug fix)' || CHR(13)||CHR(10) ||
|
|
'3.2.0 (2026-02-06): Added pKeepInTrash parameter (DEFAULT TRUE) to ARCHIVE_TABLE_DATA for TRASH folder retention control - files kept in TRASH subfolder (DATA bucket) by default for safety and compliance' || CHR(13)||CHR(10) ||
|
|
'3.1.2 (2026-02-06): Fixed missing PARTITION_YEAR/PARTITION_MONTH assignments in UPDATE statement and export query circular dependency (now filters by workflow_start instead of partition fields)' || CHR(13)||CHR(10) ||
|
|
'3.1.1 (2026-02-06): Fixed ORA-01422 error when DBMS_CLOUD.EXPORT_DATA creates multiple parquet files (parallel execution). Now stores archive directory prefix instead of individual filenames' || CHR(13)||CHR(10) ||
|
|
'3.1.0 (2026-01-29): Added function overloads for ARCHIVE_TABLE_DATA and GATHER_TABLE_STAT returning SQLCODE for Python library integration' || CHR(13)||CHR(10) ||
|
|
'3.0.0 (2026-01-27): MARS-828 - Added flexible archival strategies (MINIMUM_AGE_MONTHS with 0=current month, HYBRID) via ARCHIVAL_STRATEGY configuration' || CHR(13)||CHR(10) ||
|
|
'2.0.0 (2025-10-22): Added package versioning system using centralized ENV_MANAGER functions' || CHR(13)||CHR(10) ||
|
|
'1.5.0 (2025-10-18): Enhanced ARCHIVE_TABLE_DATA with Hive-style partitioning support' || CHR(13)||CHR(10) ||
|
|
'1.0.0 (2025-09-15): Initial release with table archival and statistics gathering';
|
|
|
|
cgBL CONSTANT VARCHAR2(2) := ENV_MANAGER.cgBL;
|
|
|
|
/**
|
|
* @name GET_TABLE_STAT
|
|
* @desc Private function to retrieve table statistics for archival processing.
|
|
* Returns A_TABLE_STAT record with table metadata and row counts.
|
|
* @param pSourceFileConfigKey - Configuration key for source file
|
|
* @return CT_MRDS.A_TABLE_STAT%ROWTYPE - Table statistics record
|
|
* @private Internal function for archival operations
|
|
**/
|
|
FUNCTION GET_TABLE_STAT(pSourceFileConfigKey IN NUMBER) RETURN CT_MRDS.A_TABLE_STAT%ROWTYPE;
|
|
|
|
/**
|
|
* @name ARCHIVE_TABLE_DATA
|
|
* @desc Wrapper procedure for DBMS_CLOUD.EXPORT_DATA.
|
|
* Exports data from table specified by pSourceFileConfigKey(A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY) into PARQUET file on OCI infrustructure.
|
|
* Each YEAR_MONTH pair goes to seperate file (implicit partitioning).
|
|
* TRASH policy is controlled by A_SOURCE_FILE_CONFIG.IS_KEEP_IN_TRASH column ('Y'=keep in TRASH, 'N'=delete immediately).
|
|
**/
|
|
PROCEDURE ARCHIVE_TABLE_DATA (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
|
|
);
|
|
|
|
/**
|
|
* @name FN_ARCHIVE_TABLE_DATA
|
|
* @desc Function wrapper for ARCHIVE_TABLE_DATA procedure.
|
|
* Returns SQLCODE for Python library integration.
|
|
* Calls the main ARCHIVE_TABLE_DATA procedure and captures execution result.
|
|
* TRASH policy is controlled by A_SOURCE_FILE_CONFIG.IS_KEEP_IN_TRASH column ('Y'=keep in TRASH, 'N'=delete immediately).
|
|
* @example SELECT FILE_ARCHIVER.FN_ARCHIVE_TABLE_DATA(pSourceFileConfigKey => 123) FROM DUAL;
|
|
* @ex_rslt 0 (success) or error code
|
|
**/
|
|
FUNCTION FN_ARCHIVE_TABLE_DATA (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
|
|
) RETURN PLS_INTEGER;
|
|
|
|
|
|
|
|
/**
|
|
* @name GATHER_TABLE_STAT
|
|
* @desc Gather info about EXTERNAL TABLE specified by pSourceFileConfigKey parameter (A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY).
|
|
* Data is inserted into A_TABLE_STAT and A_TABLE_STAT_HIST.
|
|
**/
|
|
PROCEDURE GATHER_TABLE_STAT (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
|
|
);
|
|
|
|
/**
|
|
* @name FN_GATHER_TABLE_STAT
|
|
* @desc Function wrapper for GATHER_TABLE_STAT procedure.
|
|
* Returns SQLCODE for Python library integration.
|
|
* Calls the main GATHER_TABLE_STAT procedure and captures execution result.
|
|
* @example SELECT FILE_ARCHIVER.FN_GATHER_TABLE_STAT(pSourceFileConfigKey => 123) FROM DUAL;
|
|
* @ex_rslt 0 (success) or error code
|
|
**/
|
|
FUNCTION FN_GATHER_TABLE_STAT (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE
|
|
) RETURN PLS_INTEGER;
|
|
|
|
/**
|
|
* @name GATHER_TABLE_STAT_ALL
|
|
* @desc Multi-level batch statistics gathering procedure with three granularity levels.
|
|
* Processes configurations based on IS_ARCHIVE_ENABLED setting (when pOnlyEnabled=TRUE).
|
|
* Gathers statistics for external tables and inserts data into A_TABLE_STAT and A_TABLE_STAT_HIST.
|
|
* @param pSourceFileConfigKey - (LEVEL 1) Gather stats for specific configuration key (highest priority)
|
|
* @param pSourceKey - (LEVEL 2) Gather stats for all tables in source system (e.g., 'LM', 'C2D') (medium priority)
|
|
* @param pGatherAll - (LEVEL 3) When TRUE, gather stats for ALL tables across all sources (lowest priority)
|
|
* @param pOnlyEnabled - When TRUE (default), only process tables with IS_ARCHIVE_ENABLED='Y'
|
|
* @example -- Level 1: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pSourceFileConfigKey => 123);
|
|
* @example -- Level 2: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pSourceKey => 'LM');
|
|
* @example -- Level 3: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pGatherAll => TRUE);
|
|
* @example -- All tables regardless of IS_ARCHIVE_ENABLED: CALL FILE_ARCHIVER.GATHER_TABLE_STAT_ALL(pGatherAll => TRUE, pOnlyEnabled => FALSE);
|
|
**/
|
|
PROCEDURE GATHER_TABLE_STAT_ALL (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
|
|
pGatherAll IN BOOLEAN DEFAULT FALSE,
|
|
pOnlyEnabled IN BOOLEAN DEFAULT TRUE
|
|
);
|
|
|
|
/**
|
|
* @name FN_GATHER_TABLE_STAT_ALL
|
|
* @desc Function wrapper for GATHER_TABLE_STAT_ALL procedure.
|
|
* Returns SQLCODE for Python library integration.
|
|
* Calls the main GATHER_TABLE_STAT_ALL procedure and captures execution result.
|
|
* @param pSourceFileConfigKey - (LEVEL 1) Gather stats for specific configuration key (highest priority)
|
|
* @param pSourceKey - (LEVEL 2) Gather stats for all tables in source system (medium priority)
|
|
* @param pGatherAll - (LEVEL 3) When TRUE, gather stats for ALL tables across all sources (lowest priority)
|
|
* @param pOnlyEnabled - When TRUE (default), only process tables with IS_ARCHIVE_ENABLED='Y'
|
|
* @example SELECT FILE_ARCHIVER.FN_GATHER_TABLE_STAT_ALL(pSourceKey => 'LM') FROM DUAL;
|
|
* @ex_rslt 0 (success) or error code
|
|
**/
|
|
FUNCTION FN_GATHER_TABLE_STAT_ALL (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
|
|
pGatherAll IN BOOLEAN DEFAULT FALSE,
|
|
pOnlyEnabled IN BOOLEAN DEFAULT TRUE
|
|
) RETURN PLS_INTEGER;
|
|
|
|
/**
|
|
* @name ARCHIVE_ALL
|
|
* @desc Multi-level batch archival procedure with three granularity levels.
|
|
* Only processes configurations where IS_ARCHIVE_ENABLED='Y'.
|
|
* TRASH policy for each table is controlled by individual IS_KEEP_IN_TRASH column.
|
|
* @param pSourceFileConfigKey - (LEVEL 1) Archive specific configuration key (highest priority)
|
|
* @param pSourceKey - (LEVEL 2) Archive all enabled tables for source system (e.g., 'LM', 'C2D') (medium priority)
|
|
* @param pArchiveAll - (LEVEL 3) When TRUE, archive ALL enabled tables across all sources (lowest priority)
|
|
* @example -- Level 1: CALL FILE_ARCHIVER.ARCHIVE_ALL(pSourceFileConfigKey => 123);
|
|
* @example -- Level 2: CALL FILE_ARCHIVER.ARCHIVE_ALL(pSourceKey => 'LM');
|
|
* @example -- Level 3: CALL FILE_ARCHIVER.ARCHIVE_ALL(pArchiveAll => TRUE);
|
|
**/
|
|
PROCEDURE ARCHIVE_ALL (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
|
|
pArchiveAll IN BOOLEAN DEFAULT FALSE
|
|
);
|
|
|
|
/**
|
|
* @name FN_ARCHIVE_ALL
|
|
* @desc Function wrapper for ARCHIVE_ALL procedure.
|
|
* Returns SQLCODE for Python library integration.
|
|
* Calls the main ARCHIVE_ALL procedure and captures execution result.
|
|
* @param pSourceFileConfigKey - (LEVEL 1) Archive specific configuration key (highest priority)
|
|
* @param pSourceKey - (LEVEL 2) Archive all enabled tables for source system (medium priority)
|
|
* @param pArchiveAll - (LEVEL 3) When TRUE, archive ALL enabled tables across all sources (lowest priority)
|
|
* @example SELECT FILE_ARCHIVER.FN_ARCHIVE_ALL(pSourceKey => 'LM') FROM DUAL;
|
|
* @ex_rslt 0 (success) or error code
|
|
**/
|
|
FUNCTION FN_ARCHIVE_ALL (
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pSourceKey IN CT_MRDS.A_SOURCE.A_SOURCE_KEY%TYPE DEFAULT NULL,
|
|
pArchiveAll IN BOOLEAN DEFAULT FALSE
|
|
) RETURN PLS_INTEGER;
|
|
|
|
/**
|
|
* @name RESTORE_FILE_FROM_TRASH
|
|
* @desc Restores files from TRASH folder back to ODS at three different granularity levels.
|
|
* Moves files from TRASH subfolder back to ODS subfolder in DATA bucket.
|
|
* Updates status from ARCHIVED_AND_TRASHED to INGESTED and clears archival metadata.
|
|
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to restore by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
|
|
* @param pSourceFileConfigKey - (LEVEL 2) Restore all files for specific configuration key (medium priority)
|
|
* @param pRestoreAll - (LEVEL 1) When TRUE, restore ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
|
|
* @example -- Restore single file: CALL FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pSourceFileReceivedKey => 12345);
|
|
* @example -- Restore all files for config: CALL FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pSourceFileConfigKey => 341);
|
|
* @example -- Restore all TRASH globally: CALL FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pRestoreAll => TRUE);
|
|
**/
|
|
PROCEDURE RESTORE_FILE_FROM_TRASH (
|
|
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pRestoreAll IN BOOLEAN DEFAULT FALSE
|
|
);
|
|
|
|
/**
|
|
* @name RESTORE_FILE_FROM_TRASH
|
|
* @desc Function overload for RESTORE_FILE_FROM_TRASH procedure.
|
|
* Returns SQLCODE for Python library integration.
|
|
* Calls the main RESTORE_FILE_FROM_TRASH procedure and captures execution result.
|
|
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to restore by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
|
|
* @param pSourceFileConfigKey - (LEVEL 2) Restore all files for specific configuration key (medium priority)
|
|
* @param pRestoreAll - (LEVEL 1) When TRUE, restore ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
|
|
* @example SELECT FILE_ARCHIVER.RESTORE_FILE_FROM_TRASH(pSourceFileReceivedKey => 12345) FROM DUAL;
|
|
* @ex_rslt 0 (success) or error code
|
|
**/
|
|
FUNCTION RESTORE_FILE_FROM_TRASH (
|
|
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pRestoreAll IN BOOLEAN DEFAULT FALSE
|
|
) RETURN PLS_INTEGER;
|
|
|
|
/**
|
|
* @name PURGE_TRASH_FOLDER
|
|
* @desc Deletes files from TRASH folder at three different granularity levels.
|
|
* Updates status from ARCHIVED_AND_TRASHED to ARCHIVED_AND_PURGED for all affected files.
|
|
* WARNING: This operation is irreversible - files are permanently deleted from TRASH.
|
|
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to delete by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
|
|
* @param pSourceFileConfigKey - (LEVEL 2) Delete all files for specific configuration key (medium priority)
|
|
* @param pPurgeAll - (LEVEL 1) When TRUE, delete ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
|
|
* @example -- Delete single file: CALL FILE_ARCHIVER.PURGE_TRASH_FOLDER(pSourceFileReceivedKey => 12345);
|
|
* @example -- Delete all files for config: CALL FILE_ARCHIVER.PURGE_TRASH_FOLDER(pSourceFileConfigKey => 341);
|
|
* @example -- Delete all TRASH globally: CALL FILE_ARCHIVER.PURGE_TRASH_FOLDER(pPurgeAll => TRUE);
|
|
**/
|
|
PROCEDURE PURGE_TRASH_FOLDER (
|
|
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pPurgeAll IN BOOLEAN DEFAULT FALSE
|
|
);
|
|
|
|
/**
|
|
* @name PURGE_TRASH_FOLDER
|
|
* @desc Function overload for PURGE_TRASH_FOLDER procedure.
|
|
* Returns SQLCODE for Python library integration.
|
|
* Calls the main PURGE_TRASH_FOLDER procedure and captures execution result.
|
|
* WARNING: This operation is irreversible - files are permanently deleted from TRASH.
|
|
* @param pSourceFileReceivedKey - (LEVEL 3) Specific file to delete by A_SOURCE_FILE_RECEIVED_KEY (highest priority)
|
|
* @param pSourceFileConfigKey - (LEVEL 2) Delete all files for specific configuration key (medium priority)
|
|
* @param pPurgeAll - (LEVEL 1) When TRUE, delete ALL files with ARCHIVED_AND_TRASHED status (lowest priority)
|
|
* @example SELECT FILE_ARCHIVER.PURGE_TRASH_FOLDER(pSourceFileReceivedKey => 12345) FROM DUAL;
|
|
* @ex_rslt 0 (success) or error code
|
|
**/
|
|
FUNCTION PURGE_TRASH_FOLDER (
|
|
pSourceFileReceivedKey IN CT_MRDS.A_SOURCE_FILE_RECEIVED.A_SOURCE_FILE_RECEIVED_KEY%TYPE DEFAULT NULL,
|
|
pSourceFileConfigKey IN CT_MRDS.A_SOURCE_FILE_CONFIG.A_SOURCE_FILE_CONFIG_KEY%TYPE DEFAULT NULL,
|
|
pPurgeAll IN BOOLEAN DEFAULT FALSE
|
|
) RETURN PLS_INTEGER;
|
|
|
|
---------------------------------------------------------------------------------------------------------------------------
|
|
-- PACKAGE VERSION MANAGEMENT FUNCTIONS
|
|
---------------------------------------------------------------------------------------------------------------------------
|
|
|
|
/**
|
|
* @name GET_VERSION
|
|
* @desc Returns the current version number of the FILE_ARCHIVER package.
|
|
* Uses semantic versioning format (MAJOR.MINOR.PATCH).
|
|
* @example SELECT FILE_ARCHIVER.GET_VERSION() FROM DUAL;
|
|
* @ex_rslt 2.0.0
|
|
**/
|
|
FUNCTION GET_VERSION RETURN VARCHAR2;
|
|
|
|
/**
|
|
* @name GET_BUILD_INFO
|
|
* @desc Returns comprehensive build information including version, build date, and author.
|
|
* Uses centralized ENV_MANAGER.GET_PACKAGE_VERSION_INFO function.
|
|
* @example SELECT FILE_ARCHIVER.GET_BUILD_INFO() FROM DUAL;
|
|
* @ex_rslt Package: FILE_ARCHIVER
|
|
* Version: 2.0.0
|
|
* Build Date: 2025-10-22 16:45:00
|
|
* Author: Grzegorz Michalski
|
|
**/
|
|
FUNCTION GET_BUILD_INFO RETURN VARCHAR2;
|
|
|
|
/**
|
|
* @name GET_VERSION_HISTORY
|
|
* @desc Returns complete version history with all releases and changes.
|
|
* Uses centralized ENV_MANAGER.FORMAT_VERSION_HISTORY function.
|
|
* @example SELECT FILE_ARCHIVER.GET_VERSION_HISTORY() FROM DUAL;
|
|
* @ex_rslt FILE_ARCHIVER Version History:
|
|
* 2.0.0 (2025-10-22): Added package versioning system...
|
|
**/
|
|
FUNCTION GET_VERSION_HISTORY RETURN VARCHAR2;
|
|
|
|
END;
|
|
|
|
/
|