From 74b885709600e875b2f00ec581131d44e8c1e5f9 Mon Sep 17 00:00:00 2001 From: Grzegorz Michalski Date: Fri, 20 Mar 2026 13:26:37 +0100 Subject: [PATCH] feat(FILE_ARCHIVER): Rename IS_KEEP_IN_TRASH to IS_KEPT_IN_TRASH for consistency in configuration --- confluence/FILE_ARCHIVER_Guide.md | 69 ++++++++++++++++--------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/confluence/FILE_ARCHIVER_Guide.md b/confluence/FILE_ARCHIVER_Guide.md index dca865a..612eb63 100644 --- a/confluence/FILE_ARCHIVER_Guide.md +++ b/confluence/FILE_ARCHIVER_Guide.md @@ -288,11 +288,11 @@ WHERE SOURCE_FILE_TYPE = 'INPUT' ORDER BY SOURCE_FILE_ID, TABLE_ID; ``` -### IS_KEEP_IN_TRASH Column +### IS_KEPT_IN_TRASH Column Controls TRASH folder retention policy for archived files. -**Column**: `A_SOURCE_FILE_CONFIG.IS_KEEP_IN_TRASH` (CHAR(1), DEFAULT 'N' NOT NULL) +**Column**: `A_SOURCE_FILE_CONFIG.IS_KEPT_IN_TRASH` (CHAR(1), DEFAULT 'N' NOT NULL) **Values**: - `'Y'` - CSV files kept in TRASH folder after archival (status: ARCHIVED_AND_TRASHED) @@ -312,7 +312,7 @@ Controls TRASH folder retention policy for archived files. ```sql -- Production: Keep files in TRASH (recommended) UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG -SET IS_KEEP_IN_TRASH = 'Y' +SET IS_KEPT_IN_TRASH = 'Y' WHERE SOURCE_FILE_TYPE = 'INPUT' AND SOURCE_FILE_ID = 'LM' AND TABLE_ID LIKE 'LM_%'; @@ -320,14 +320,14 @@ COMMIT; -- Test environment: Cleanup TRASH to save storage UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG -SET IS_KEEP_IN_TRASH = 'N' +SET IS_KEPT_IN_TRASH = 'N' WHERE SOURCE_FILE_TYPE = 'INPUT' AND SOURCE_FILE_ID = 'TEST_SOURCE'; COMMIT; -- Bulk configuration by source UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG -SET IS_KEEP_IN_TRASH = 'Y' +SET IS_KEPT_IN_TRASH = 'Y' WHERE SOURCE_FILE_TYPE = 'INPUT' AND SOURCE_FILE_ID IN ('CSDB', 'C2D', 'LM'); COMMIT; @@ -393,7 +393,7 @@ INGESTED → ARCHIVED_AND_TRASHED → ARCHIVED_AND_PURGED (optional) **Status Descriptions**: - **INGESTED**: File successfully processed through Airflow+DBT, residing in ODS bucket - **ARCHIVED_AND_TRASHED**: File archived to Parquet in ARCHIVE bucket, CSV retained in TRASH folder (DATA bucket) -- **ARCHIVED_AND_PURGED**: File archived to Parquet, CSV deleted from TRASH folder (when IS_KEEP_IN_TRASH='N') +- **ARCHIVED_AND_PURGED**: File archived to Parquet, CSV deleted from TRASH folder (when IS_KEPT_IN_TRASH='N') **Associated Columns Updated During Archival**: ```sql @@ -435,9 +435,9 @@ https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/namespace/b/archive/o/ARC 2.1 TRASH Subfolder (DATA Bucket - File Retention) ├─ Located in DATA bucket (e.g., TRASH/LM/TABLE_NAME) ├─ Stores CSV files after archival to Parquet - ├─ Status: ARCHIVED_AND_TRASHED (default, controlled by IS_KEEP_IN_TRASH config) + ├─ Status: ARCHIVED_AND_TRASHED (default, controlled by IS_KEPT_IN_TRASH config) ├─ Enables rollback if archival issues occur - └─ Optional cleanup: ARCHIVED_AND_PURGED (when IS_KEEP_IN_TRASH = 'N') + └─ Optional cleanup: ARCHIVED_AND_PURGED (when IS_KEPT_IN_TRASH = 'N') 3. ARCHIVE Bucket (Long-term Storage) ├─ Historical data in Parquet format @@ -447,14 +447,14 @@ https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/namespace/b/archive/o/ARC **Key Procedures**: - `ARCHIVE_TABLE_DATA(pSourceFileConfigKey)` - Main archival procedure using strategy-specific WHERE clause - - TRASH folder retention controlled by `IS_KEEP_IN_TRASH` column in A_SOURCE_FILE_CONFIG + - TRASH folder retention controlled by `IS_KEPT_IN_TRASH` column in A_SOURCE_FILE_CONFIG - `ARCHIVE_ALL(pSourceFileConfigKey, pSourceKey, pArchiveAll)` - Batch archival with 3-level granularity and error handling - **Level 3 (Highest Priority)**: Single configuration via `pSourceFileConfigKey` - **Level 2 (Medium Priority)**: All configurations for source via `pSourceKey` - **Level 1 (Lowest Priority)**: All configurations system-wide via `pArchiveAll` - **Error Handling**: Continues processing other tables on individual failures - **Filtering**: Respects `IS_ARCHIVE_ENABLED='Y'` (skips disabled configurations) - - **Individual TRASH Policy**: Each table's `IS_KEEP_IN_TRASH` setting applied independently + - **Individual TRASH Policy**: Each table's `IS_KEPT_IN_TRASH` setting applied independently - **Summary Reporting**: Returns counts of Archived/Skipped/Failed tables - `GET_ARCHIVAL_WHERE_CLAUSE` - Returns WHERE clause based on configured strategy - `GATHER_TABLE_STAT` - Calculates archival statistics using strategy logic @@ -464,7 +464,7 @@ https://objectstorage.eu-frankfurt-1.oraclecloud.com/n/namespace/b/archive/o/ARC **Archival Execution**: ```sql --- Single table archival (TRASH retention controlled by IS_KEEP_IN_TRASH config) +-- Single table archival (TRASH retention controlled by IS_KEPT_IN_TRASH config) BEGIN CT_MRDS.FILE_ARCHIVER.ARCHIVE_TABLE_DATA( pSourceFileConfigKey => vSourceFileConfigKey @@ -500,7 +500,7 @@ END; - Data matching criteria moved from ODS to ARCHIVE bucket - CSV files moved to TRASH subfolder in DATA bucket (ODS/ → TRASH/) - Parquet format with Hive-style partitioning applied to ARCHIVE bucket -- TRASH retention controlled by IS_KEEP_IN_TRASH column in A_SOURCE_FILE_CONFIG +- TRASH retention controlled by IS_KEPT_IN_TRASH column in A_SOURCE_FILE_CONFIG ### Automatic Rollback Mechanism @@ -510,7 +510,7 @@ FILE_ARCHIVER implements **automatic rollback** to ensure data integrity if arch 1. **Export to ARCHIVE**: Data exported to Parquet format in ARCHIVE bucket 2. **Status Update**: A_SOURCE_FILE_RECEIVED records updated to 'ARCHIVED_AND_TRASHED' 3. **Move to TRASH**: CSV files moved from ODS to TRASH folder (DATA bucket) -4. **Optional Cleanup**: If IS_KEEP_IN_TRASH='N', files deleted from TRASH +4. **Optional Cleanup**: If IS_KEPT_IN_TRASH='N', files deleted from TRASH **Automatic Rollback Trigger**: If **any error occurs** during step 3 (Move to TRASH), the system: @@ -725,7 +725,7 @@ UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG SET ARCHIVAL_STRATEGY = 'MINIMUM_AGE_MONTHS', MINIMUM_AGE_MONTHS = 6, IS_ARCHIVE_ENABLED = 'Y', -- Enable archival - IS_KEEP_IN_TRASH = 'Y' -- Keep files in TRASH for safety + IS_KEPT_IN_TRASH = 'Y' -- Keep files in TRASH for safety WHERE SOURCE_FILE_TYPE = 'INPUT' AND SOURCE_FILE_ID = 'CSDB' AND TABLE_ID = 'CSDB_DEBT'; @@ -739,7 +739,7 @@ COMMIT; -- Configure TRASH cleanup for test environment UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG -SET IS_KEEP_IN_TRASH = 'N' -- Delete files from TRASH after archival +SET IS_KEPT_IN_TRASH = 'N' -- Delete files from TRASH after archival WHERE SOURCE_FILE_TYPE = 'INPUT' AND SOURCE_FILE_ID = 'TEST_SOURCE'; COMMIT; @@ -751,7 +751,7 @@ SELECT ARCHIVAL_STRATEGY, MINIMUM_AGE_MONTHS, IS_ARCHIVE_ENABLED, - IS_KEEP_IN_TRASH + IS_KEPT_IN_TRASH FROM CT_MRDS.A_SOURCE_FILE_CONFIG WHERE SOURCE_FILE_TYPE = 'INPUT' ORDER BY SOURCE_FILE_ID, TABLE_ID; @@ -759,12 +759,12 @@ ORDER BY SOURCE_FILE_ID, TABLE_ID; -- Summary by archival status SELECT IS_ARCHIVE_ENABLED, - IS_KEEP_IN_TRASH, + IS_KEPT_IN_TRASH, COUNT(*) AS TABLE_COUNT FROM CT_MRDS.A_SOURCE_FILE_CONFIG WHERE SOURCE_FILE_TYPE = 'INPUT' -GROUP BY IS_ARCHIVE_ENABLED, IS_KEEP_IN_TRASH -ORDER BY IS_ARCHIVE_ENABLED DESC, IS_KEEP_IN_TRASH DESC; +GROUP BY IS_ARCHIVE_ENABLED, IS_KEPT_IN_TRASH +ORDER BY IS_ARCHIVE_ENABLED DESC, IS_KEPT_IN_TRASH DESC; ``` ## Release 01 Configuration @@ -948,7 +948,7 @@ WHERE object_name LIKE 'ARCHIVE/LM/STANDING_FACILITIES/PARTITION_YEAR=2026/PARTI **Symptoms**: Files not deleted from TRASH after archival -**Cause**: Configuration has `IS_KEEP_IN_TRASH='Y'` (retain files in TRASH) +**Cause**: Configuration has `IS_KEPT_IN_TRASH='Y'` (retain files in TRASH) **Verification**: ```sql @@ -956,8 +956,8 @@ WHERE object_name LIKE 'ARCHIVE/LM/STANDING_FACILITIES/PARTITION_YEAR=2026/PARTI SELECT SOURCE_FILE_ID, TABLE_ID, - IS_KEEP_IN_TRASH, - CASE IS_KEEP_IN_TRASH + IS_KEPT_IN_TRASH, + CASE IS_KEPT_IN_TRASH WHEN 'Y' THEN 'Files RETAINED in TRASH (manual purge required)' WHEN 'N' THEN 'Files DELETED immediately after archival' END AS TRASH_BEHAVIOR @@ -969,7 +969,7 @@ WHERE TABLE_ID = 'YOUR_TABLE'; ```sql -- Option A: Change configuration to auto-delete (permanent change) UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG -SET IS_KEEP_IN_TRASH = 'N' -- Auto-delete from TRASH after archival +SET IS_KEPT_IN_TRASH = 'N' -- Auto-delete from TRASH after archival WHERE TABLE_ID = 'YOUR_TABLE'; COMMIT; @@ -1132,7 +1132,7 @@ SELECT SFR.FILE_SIZE_BYTES, SFR.UPDATED_AT AS ARCHIVED_AT, TRUNC(SYSDATE - SFR.UPDATED_AT) AS DAYS_IN_TRASH, - SFC.IS_KEEP_IN_TRASH AS TRASH_POLICY + SFC.IS_KEPT_IN_TRASH AS TRASH_POLICY FROM CT_MRDS.A_SOURCE_FILE_RECEIVED SFR JOIN CT_MRDS.A_SOURCE_FILE_CONFIG SFC ON SFR.A_SOURCE_FILE_CONFIG_KEY = SFC.A_SOURCE_FILE_CONFIG_KEY WHERE SFR.PROCESSING_STATUS = 'ARCHIVED_AND_TRASHED' @@ -1148,7 +1148,7 @@ SELECT SFC.TABLE_ID, SFC.ARCHIVAL_STRATEGY, SFC.IS_ARCHIVE_ENABLED, - SFC.IS_KEEP_IN_TRASH, + SFC.IS_KEPT_IN_TRASH, COUNT(CASE WHEN SFR.PROCESSING_STATUS = 'INGESTED' THEN 1 END) AS PENDING_ARCHIVE, COUNT(CASE WHEN SFR.PROCESSING_STATUS = 'ARCHIVED_AND_TRASHED' THEN 1 END) AS IN_TRASH, COUNT(CASE WHEN SFR.PROCESSING_STATUS = 'ARCHIVED_AND_PURGED' THEN 1 END) AS PURGED, @@ -1158,7 +1158,7 @@ LEFT JOIN CT_MRDS.A_SOURCE_FILE_RECEIVED SFR ON SFC.A_SOURCE_FILE_CONFIG_KEY = S WHERE SFC.SOURCE_FILE_TYPE = 'INPUT' GROUP BY SFC.SOURCE_FILE_ID, SFC.TABLE_ID, SFC.ARCHIVAL_STRATEGY, - SFC.IS_ARCHIVE_ENABLED, SFC.IS_KEEP_IN_TRASH + SFC.IS_ARCHIVE_ENABLED, SFC.IS_KEPT_IN_TRASH ORDER BY SFC.SOURCE_FILE_ID, SFC.TABLE_ID; ``` @@ -1218,11 +1218,11 @@ SELECT ROUND(SUM(SFR.FILE_SIZE_BYTES) / 1024 / 1024 / 1024, 2) AS SIZE_GB, MIN(SFR.UPDATED_AT) AS OLDEST_IN_TRASH, MAX(SFR.UPDATED_AT) AS NEWEST_IN_TRASH, - SFC.IS_KEEP_IN_TRASH AS POLICY + SFC.IS_KEPT_IN_TRASH AS POLICY FROM CT_MRDS.A_SOURCE_FILE_RECEIVED SFR JOIN CT_MRDS.A_SOURCE_FILE_CONFIG SFC ON SFR.A_SOURCE_FILE_CONFIG_KEY = SFC.A_SOURCE_FILE_CONFIG_KEY WHERE SFR.PROCESSING_STATUS = 'ARCHIVED_AND_TRASHED' -GROUP BY SFC.SOURCE_FILE_ID, SFC.IS_KEEP_IN_TRASH +GROUP BY SFC.SOURCE_FILE_ID, SFC.IS_KEPT_IN_TRASH ORDER BY SIZE_GB DESC; ``` @@ -1241,7 +1241,7 @@ ORDER BY SIZE_GB DESC; ### v3.3.0 (2026-02-11) - **BREAKING CHANGE**: Removed `pKeepInTrash` parameter from ARCHIVE_TABLE_DATA - Added `IS_ARCHIVE_ENABLED` column to A_SOURCE_FILE_CONFIG for selective archiving control -- Added `IS_KEEP_IN_TRASH` column to A_SOURCE_FILE_CONFIG (replaces pKeepInTrash parameter) +- Added `IS_KEPT_IN_TRASH` column to A_SOURCE_FILE_CONFIG (replaces pKeepInTrash parameter) - Added batch procedures with 3-level granularity (config/source/all): - ARCHIVE_ALL - Batch archival procedure - GATHER_TABLE_STAT_ALL - Batch statistics procedure @@ -1392,7 +1392,7 @@ ORDER BY SIZE_GB DESC; ### TRASH Folder Retention Best Practices -1. **Default Behavior (IS_KEEP_IN_TRASH = 'Y' - Recommended)**: +1. **Default Behavior (IS_KEPT_IN_TRASH = 'Y' - Recommended)**: - Keeps CSV files in TRASH folder after archival - Provides safety net for rollback if archival issues occur - Supports compliance and audit requirements @@ -1401,11 +1401,11 @@ ORDER BY SIZE_GB DESC; - Configuration: ```sql UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG - SET IS_KEEP_IN_TRASH = 'Y' + SET IS_KEPT_IN_TRASH = 'Y' WHERE SOURCE_FILE_TYPE = 'INPUT' AND TABLE_ID = 'YOUR_TABLE'; ``` -2. **TRASH Cleanup (IS_KEEP_IN_TRASH = 'N')**: +2. **TRASH Cleanup (IS_KEPT_IN_TRASH = 'N')**: - Deletes CSV files from TRASH folder after successful archival - Reduces storage costs in DATA bucket - Status: ARCHIVED_AND_PURGED @@ -1413,7 +1413,7 @@ ORDER BY SIZE_GB DESC; - Configuration: ```sql UPDATE CT_MRDS.A_SOURCE_FILE_CONFIG - SET IS_KEEP_IN_TRASH = 'N' + SET IS_KEPT_IN_TRASH = 'N' WHERE SOURCE_FILE_TYPE = 'INPUT' AND TABLE_ID = 'YOUR_TABLE'; ``` @@ -1423,7 +1423,7 @@ ORDER BY SIZE_GB DESC; SELECT SOURCE_FILE_NAME, PROCESSING_STATUS, - ARCH_FILE_NAME, + ARCH_PATH, PARTITION_YEAR, PARTITION_MONTH FROM CT_MRDS.A_SOURCE_FILE_RECEIVED @@ -1446,3 +1446,4 @@ ORDER BY SIZE_GB DESC; ``` +