Update DATA_EXPORTER package to v2.17.0: Fix RFC 4180 compliance and Parquet format corruption

This commit is contained in:
Grzegorz Michalski
2026-03-12 08:50:08 +01:00
parent 5ba6c30fda
commit 202b535f9f
10 changed files with 346 additions and 46 deletions

View File

@@ -129,7 +129,8 @@ AS
pTableName IN VARCHAR2,
pSchemaName IN VARCHAR2,
pKeyColumnName IN VARCHAR2,
pTemplateTableName IN VARCHAR2
pTemplateTableName IN VARCHAR2,
pFormat IN VARCHAR2 DEFAULT 'CSV'
) RETURN VARCHAR2 IS
vResult VARCHAR2(32767);
vColumns VARCHAR2(32767);
@@ -243,14 +244,15 @@ AS
vResult := vResult || CASE WHEN vResult IS NOT NULL THEN ', ' ELSE '' END ||
'TO_CHAR(T.' || vCurrentCol || ', ''' || vDateFormat || ''') AS ' || vCurrentCol;
-- Other columns: RFC 4180 quote-doubling for character types, as-is for others
-- Other columns: RFC 4180 quote-doubling for CSV character types, as-is for Parquet/others
-- Oracle DBMS_CLOUD.EXPORT_DATA has no native RFC 4180 "" doubling.
-- escape=true -> backslash-escaped (\") - incompatible with ORACLE_LOADER.
-- escape=false -> unescaped embedded quotes - also incompatible.
-- Solution: pre-double any " in VARCHAR2/CHAR/CLOB before Oracle wraps in quote.
-- Solution (CSV only): pre-double any " in VARCHAR2/CHAR/CLOB before Oracle wraps in quote.
-- Oracle then writes: "BIDDER-'""=:" which ORACLE_LOADER reads as BIDDER-'"=:.
-- Parquet is binary - no quoting needed; REPLACE would corrupt string data.
ELSE
IF vDataType IN ('VARCHAR2', 'NVARCHAR2', 'CHAR', 'NCHAR', 'CLOB', 'NCLOB') THEN
IF pFormat = 'CSV' AND vDataType IN ('VARCHAR2', 'NVARCHAR2', 'CHAR', 'NCHAR', 'CLOB', 'NCLOB') THEN
vResult := vResult || CASE WHEN vResult IS NOT NULL THEN ', ' ELSE '' END ||
'REPLACE(T.' || vCurrentCol || ', CHR(34), CHR(34)||CHR(34)) AS ' || vCurrentCol;
ELSE
@@ -1091,7 +1093,7 @@ AS
VALIDATE_TABLE_AND_COLUMNS(vSchemaName, vTableName, vKeyColumnName, pColumnList, vParameters);
-- Build query with TO_CHAR for date columns (per-column format support)
vProcessedColumnList := buildQueryWithDateFormats(pColumnList, vTableName, vSchemaName, vKeyColumnName, pTemplateTableName);
vProcessedColumnList := buildQueryWithDateFormats(pColumnList, vTableName, vSchemaName, vKeyColumnName, pTemplateTableName, 'PARQUET');
ENV_MANAGER.LOG_PROCESS_EVENT('Input column list: ' || NVL(pColumnList, 'NULL (building dynamic list from table metadata)'), 'DEBUG', vParameters);
ENV_MANAGER.LOG_PROCESS_EVENT('Processed column list with TO_CHAR for date columns: ' || vProcessedColumnList, 'DEBUG', vParameters);

View File

@@ -9,19 +9,20 @@ AS
**/
-- Package Version Information
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.16.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-03-11 08:00:00';
PACKAGE_VERSION CONSTANT VARCHAR2(10) := '2.17.0';
PACKAGE_BUILD_DATE CONSTANT VARCHAR2(20) := '2026-03-11 10:00:00';
PACKAGE_AUTHOR CONSTANT VARCHAR2(100) := 'Grzegorz Michalski';
-- Version History (last 3-5 changes)
VERSION_HISTORY CONSTANT VARCHAR2(4000) :=
'v2.17.0 (2026-03-11): PARQUET FIX - Added pFormat parameter to buildQueryWithDateFormats. REPLACE(col,CHR(34)) now applied only when pFormat=CSV. EXPORT_TABLE_DATA_BY_DATE passes PARQUET - string data was being corrupted (single " doubled to ""). Parquet is binary and needs no quote escaping.' || CHR(10) ||
'v2.16.0 (2026-03-11): RFC 4180 FIX - Added REPLACE(col,CHR(34),CHR(34)||CHR(34)) in buildQueryWithDateFormats for VARCHAR2/CHAR/CLOB. Oracle DBMS_CLOUD has no native RFC 4180 doubling: escape=true uses backslash, no escape leaves raw quotes. Pre-doubled values produce compliant CSV for ORACLE_LOADER OPTIONALLY ENCLOSED BY chr(34).' || CHR(10) ||
'v2.15.0 (2026-03-10): INCOMPLETE FIX - Removed escape=true only; embedded quotes still unescaped. Superseded by v2.16.0.' || CHR(10) ||
'v2.14.0 (2026-02-25): OPTIMIZATION - Added pTaskName parameter to EXPORT_PARTITION_PARALLEL for deterministic filtering. Replaced FETCH FIRST 1 ROW ONLY safeguard with precise WHERE CHUNK_ID AND TASK_NAME filter. Eliminates ORDER BY overhead and provides cleaner session isolation.' || CHR(10) ||
'v2.13.1 (2026-02-25): CRITICAL FIX - Added START_ID and END_ID aliasses in CREATE_CHUNKS_BY_SQL to avoid ORA-00960 ambiguous column naming error.' || CHR(10) ||
'v2.13.0 (2026-02-25): CRITICAL SESSION ISOLATION FIX - Changed CREATE_CHUNKS_BY_NUMBER_COL to CREATE_CHUNKS_BY_SQL with TASK_NAME filter (fixes ORA-01422 in concurrent sessions). Added ORDER BY CREATED_DATE DESC FETCH FIRST 1 ROW safeguard to EXPORT_PARTITION_PARALLEL SELECT. Composite PK (TASK_NAME, CHUNK_ID) now fully functional.' || CHR(10) ||
'v2.12.0 (2026-02-24): CRITICAL FIX - Rewritten DELETE_FAILED_EXPORT_FILE to use file-specific pattern matching (prevents deleting parallel CSV chunks in shared folder). Added vQuery logging before DBMS_CLOUD calls. Added CSV maxfilesize logging.' || CHR(10) ||
'v2.11.0 (2026-02-18): Added pJobClass parameter to EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE for Oracle Scheduler job class support (resource/priority management).' || CHR(10);
'v2.14.0 (2026-02-25): OPTIMIZATION - Added pTaskName parameter to EXPORT_PARTITION_PARALLEL for deterministic filtering. Replaced FETCH FIRST 1 ROW ONLY safeguard with precise WHERE CHUNK_ID AND TASK_NAME filter. Eliminates ORDER BY overhead and provides cleaner session isolation.' || CHR(10) ||
'v2.13.1 (2026-02-25): CRITICAL FIX - Added START_ID and END_ID aliasses in CREATE_CHUNKS_BY_SQL to avoid ORA-00960 ambiguous column naming error.' || CHR(10) ||
'v2.13.0 (2026-02-25): CRITICAL SESSION ISOLATION FIX - Changed CREATE_CHUNKS_BY_NUMBER_COL to CREATE_CHUNKS_BY_SQL with TASK_NAME filter (fixes ORA-01422 in concurrent sessions). Added ORDER BY CREATED_DATE DESC FETCH FIRST 1 ROW safeguard to EXPORT_PARTITION_PARALLEL SELECT. Composite PK (TASK_NAME, CHUNK_ID) now fully functional.' || CHR(10) ||
'v2.12.0 (2026-02-24): CRITICAL FIX - Rewritten DELETE_FAILED_EXPORT_FILE to use file-specific pattern matching (prevents deleting parallel CSV chunks in shared folder). Added vQuery logging before DBMS_CLOUD calls. Added CSV maxfilesize logging.' || CHR(10) ||
'v2.11.0 (2026-02-18): Added pJobClass parameter to EXPORT_TABLE_DATA_BY_DATE and EXPORT_TABLE_DATA_TO_CSV_BY_DATE for Oracle Scheduler job class support (resource/priority management).' || CHR(10);
cgBL CONSTANT VARCHAR2(2) := CHR(13)||CHR(10);
vgMsgTmp VARCHAR2(32000);