init

2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions
--- a/python/code_generation/generate_code.py
+++ b/python/code_generation/generate_code.py
@@ -0,0 +1,73 @@
+from jinja2 import Environment, FileSystemLoader
+import csv
+
+environment = Environment(loader=FileSystemLoader("templates/"))
+template_sq = environment.get_template("m_Template_sq.sql")
+template_tgt = environment.get_template("m_Template_target.sql")
+template_db = environment.get_template("db_Template.sql")
+template_dag = environment.get_template("dag_Template.py")
+
+csvFile = csv.reader(open("ods_mopdb_plain.txt", "r"))
+header = next(csvFile) # skip header
+
+for row in csvFile:
+
+    (schema, table) = row
+
+    # ou_tms,ACTIVITYLOGDUE,TMS,T_ACTIVITYLOGDUE,m_MOPDB_TMS_T_ACTIVITYLOGDUE_OU_TMS_ACTIVITYLOGDUE,w_ODS_TMS_ACTIVITYLOGDUE
+
+    target_table  = f"T_{table}"
+    source_schema = f"OU_{schema}"
+    source_schema_lower = f"{source_schema}".lower()
+    source_table  = table
+    mapping_name  = f"m_MOPDB_{schema}_{target_table}_{source_schema}_{source_table}"
+    workflow_name = f"w_MOPDB_{schema}_{target_table}"
+
+
+    content = template_sq.render(
+        source_schema = source_schema_lower,
+	source_table  = table,
+	target_schema = schema,
+	target_table  = target_table,
+	mapping_name  = mapping_name,
+	workflow_name = workflow_name
+    )
+    filename=f"source_qualifiers/{mapping_name}_SQ.sql"
+    with open(filename, mode="w", encoding="utf-8") as message:
+        message.write(content)
+        print(f"... wrote {filename}")
+
+    content = template_tgt.render(
+        source_schema = source_schema_lower,
+	source_table  = table,
+	target_schema = schema,
+	target_table  = target_table,
+	mapping_name  = mapping_name,
+	workflow_name = workflow_name
+    )
+    filename=f"targets/{mapping_name}.sql"
+    with open(filename, mode="w", encoding="utf-8") as message:
+        message.write(content)
+        print(f"... wrote {filename}")
+
+    content = template_dag.render(
+	table  = table
+    )
+    filename=f"dags/{workflow_name}.py"
+    with open(filename, mode="w", encoding="utf-8") as message:
+        message.write(content)
+        print(f"... wrote {filename}")
+
+    content = template_db.render(
+	table  = table,
+        schema = schema
+    )
+    filename=f"db/{source_schema}_{source_table}.sql"
+    with open(filename, mode="w", encoding="utf-8") as message:
+        message.write(content)
+        print(f"... wrote {filename}")
+    filename=f"db/table_changes.sql"
+    with open(filename, mode="a", encoding="utf-8") as message:
+        message.write(f"{content}\n")
+        print(f"... wrote {filename}")
+
--- a/python/code_generation/ods_mopdb_plain.txt
+++ b/python/code_generation/ods_mopdb_plain.txt
@@ -0,0 +1,30 @@
+SCHEMA,TABLE
+TMS,ACMENTRYSTATELEDGERGROUP
+TMS,ACTIVITYLOGDUE
+TMS,ACTIVITY_LOG
+TMS,BALANCE
+TMS,BLACKOUT_LOG
+TMS,BRANCH
+TMS,CALENDAR
+TMS,CASHFLOW
+TMS,CLIENT
+TMS,CUSTODYBALANCE
+TMS,ECBINSTRUMENTBONDCASHFLOW
+TMS,EFFECTIVEROLEPROFILE
+TMS,FINMESSAGELOG
+TMS,HISTORY_LOG
+TMS,INSTRUMENTBONDCASHFLOW
+TMS,INSTRUMENT_REPORT
+TMS,MARKETINFO
+TMS,PARAMETER
+TMS,PORTFOLIOTREE
+TMS,PRICES
+TMS,PROPERTY
+TMS,RECONCILIATION
+TMS,ROLEPORTFOLIOPROFILE
+TMS,RULES
+TMS,SDM_ENTITY_STATE
+TMS,SECURITYPOSITION
+TMS,SETTLEMENTCASHFLOW
+TMS,SETTLEMENTLOG
+TMS,USERINFORMATION
--- a/python/code_generation/templates/dag_Template.py
+++ b/python/code_generation/templates/dag_Template.py
@@ -0,0 +1,95 @@
+from airflow.decorators import dag
+from airflow.operators.bash import BashOperator
+from airflow.operators.python import PythonOperator
+from airflow.utils.dates import days_ago
+from airflow.utils.trigger_rule import TriggerRule
+from cosmos import DbtTaskGroup, ProfileConfig, ProjectConfig, RenderConfig
+
+# Define paths to your dbt files
+dbt_root_path = "/home/dbt/DBT/mrds"
+dbt_profiles_dir = "/home/dbt/.dbt/profiles.yml"
+
+ODS_TABLE =   "{{table}}"
+DATABASE_NAME = "MOPDB"
+DAG_NAME      = f"w_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}"
+MAPPING_NAME  = f"m_{DATABASE_NAME}_TMS_T_{ODS_TABLE}_OU_TMS_{ODS_TABLE}"
+
+
+# Define function for the retrieval of the current run_id
+def retrieve_run_id(**kwargs):
+    # Retrieve the run_id from the Airflow context
+    run_id = kwargs['run_id']
+    # Store the run_id in XCom for future reference
+    ti = kwargs['ti']
+    ti.xcom_push(key='run_id', value=run_id)
+    return run_id
+
+def check_dag_status(**kwargs):
+    for task_instance in kwargs['dag_run'].get_task_instances():
+        if task_instance.state == 'failed' and task_instance.task_id != kwargs['task_instance'].task_id:
+            raise Exception("Task {} failed. Failing this DAG run".format(task_instance.task_id))
+
+# Define function for the check of the status of the previous tasks
+def determine_workflow_status(**kwargs):
+    # Check the status of previous tasks
+    task_statuses = kwargs['ti'].xcom_pull(task_ids=['retrieve_run_id', 'control_external_run_start', 'mapping_mopdb'])
+   # If any task failed, set workflow_status to 'N', otherwise 'Y'
+    workflow_status = 'N' if any(status != 'success' for status in task_statuses) else 'Y'
+    return workflow_status
+
+@dag(
+        dag_id=DAG_NAME,
+        schedule_interval=None,
+        start_date=days_ago(2),
+        catchup=False
+)
+def run_dag():
+    # Retrieve run_id
+    retrieve_run_id_task = PythonOperator(
+        task_id='retrieve_run_id',
+        python_callable=retrieve_run_id,
+        provide_context=True,
+       # pool='my_custom_pool',  # Create pool in Airflow Web UI with one slot to ensure that only one dag can run it at a time.
+    )
+    # Run dbt macro control_external_run_start
+    control_external_run_start = BashOperator(
+        task_id='control_external_run_start',
+        bash_command=(
+            'cd /home/dbt/DBT/mrds && '
+            'dbt run-operation control_external_run_start --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' '
+            '--profiles-dir /home/dbt/.dbt/ --target dev'
+        )
+    )
+    # run dbt taskGroup with tag of the mapping name
+    dbtTaskGroup = DbtTaskGroup(
+        group_id=MAPPING_NAME,
+        project_config=ProjectConfig(
+                             dbt_project_path = dbt_root_path),                             
+        profile_config=ProfileConfig(
+                             profiles_yml_filepath = dbt_profiles_dir,
+                             profile_name="mrds",
+                             target_name="dev"),  
+        render_config=RenderConfig(select=[f"tag:{MAPPING_NAME}"],),
+        operator_args={'vars': {'orchestration_run_id': '{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}', "input_service_name": DATABASE_NAME, "workflow_name": DAG_NAME }}
+    )
+    
+    control_external_run_end = BashOperator(
+        task_id='control_external_run_end',
+        bash_command=(
+            'cd /home/dbt/DBT/mrds && '
+            'dbt run-operation control_external_run_end --vars \'{"orchestration_run_id": "{% raw %}{{{% endraw %} task_instance.xcom_pull(task_ids="retrieve_run_id", key="run_id") {% raw %}}}{% endraw %}", "input_service_name": "' + DATABASE_NAME + '", "workflow_name": "' + DAG_NAME + '"}\' '
+            '--profiles-dir /home/dbt/.dbt/ --target dev'
+        ),
+        trigger_rule=TriggerRule.ALL_DONE  # Run regardless of previous task outcomes
+    )
+
+    dag_status = PythonOperator(
+        task_id='dag_status',
+        provide_context=True,
+        python_callable=check_dag_status,
+        trigger_rule=TriggerRule.ALL_DONE, # Ensures this task runs even if upstream fails
+    )
+
+    # Set task dependencies
+    retrieve_run_id_task >> control_external_run_start >> [dbtTaskGroup] >> control_external_run_end >> dag_status
+globals()[DAG_NAME] = run_dag()
--- a/python/code_generation/templates/db_Template.sql
+++ b/python/code_generation/templates/db_Template.sql
@@ -0,0 +1,2 @@
+ALTER TABLE OU_{{schema}}.{{table}} RENAME COLUMN A_ETL_LOAD_SET_FK TO A_WORKFLOW_HISTORY_KEY;
+DROP TABLE {{schema}}.T_{{table}};
--- a/python/code_generation/templates/m_Template_sq.sql
+++ b/python/code_generation/templates/m_Template_sq.sql
@@ -0,0 +1,11 @@
+{% raw %}{{{% endraw %}
+    config(
+        materialized="table",
+        tags=["{{mapping_name}}", "{{source_schema}}", "{{target_schema}}"],
+        alias="{{target_table}}_SQ",
+	schema="{{target_schema}}"
+    )
+{% raw %}}}{% endraw %}
+{% raw %}{{{% endraw %}
+ create_table_from_source("{{source_schema}}","{{source_table}}","{{workflow_name}}",get_main_task_name(model.name),['A_KEY','A_WORKFLOW_HISTORY_KEY'])
+{% raw %}}}{% endraw %}
--- a/python/code_generation/templates/m_Template_target.sql
+++ b/python/code_generation/templates/m_Template_target.sql
@@ -0,0 +1,13 @@
+{% raw %}-- depends_on: {{{% endraw %} ref('{{mapping_name}}_SQ') {% raw %}}} {% endraw %} 
+{% raw %}{{{% endraw %}  
+    config(
+        tags=["{{mapping_name}}","MOPDB","{{target_schema}}"],
+        alias="{{target_table}}",
+	schema="{{target_schema}}",
+        materialized="incremental" ,
+        incremental_strategy="merge"
+    )
+{% raw %}}}{% endraw %}  
+{% raw %}{{{% endraw %}  
+create_table_target('{{mapping_name}}_SQ')
+{% raw %}}}{% endraw %}  
--- a/python/connectors/casper/casper_rqsd.py
+++ b/python/connectors/casper/casper_rqsd.py
@@ -0,0 +1,201 @@
+import requests
+import io
+import zipfile
+import pandas as pd
+import os
+from datetime import datetime
+import oci
+from  mrds.utils.secrets import  get_secret
+import mrds.utils.manage_runs as runManager
+import mrds.utils.manage_files as fileManager
+import mrds.utils.sql_statements as sqls
+import sys
+import yaml
+TASK_HISTORY_MULTIPLIER = 1_000_000_000
+
+def initialize_task(workflow_context, task_name):
+    # Initialize task
+    a_task_history_key = runManager.init_task(
+        task_name,
+        workflow_context["run_id"],
+        workflow_context["a_workflow_history_key"],
+    )
+
+    return a_task_history_key
+
+def rqsd_parser(fileName,bucket_path,file,bucket_name):
+    if "SCOPA" in fileName or  "SCOPF" in fileName:
+        print("SCOP")
+        annex_1_1(fileName,bucket_path,file,bucket_name)
+        annex_1_2(fileName,bucket_path,file,bucket_name)
+    elif "RQSDC" in fileName:
+           print("RQSDC")
+           return annex_2(fileName, bucket_path,file,bucket_name)
+
+def annex_1_1(fileName, bucket_path,file,bucket_name):
+    fileData=fileName.split("_")
+    csv_file_path = fileName[:-4]+".csv"
+    version_number = fileData[6]  
+    ref_exercise = fileData[2]     
+    ncb = fileData[4]                  
+    df = pd.read_excel(file, sheet_name="Counterparties in scope", skiprows=3)
+    df = df.dropna(axis=1, how='all').dropna(axis=0, how='all')
+    df['file_name'] = os.path.basename(fileName)
+    df['ingestion_timestamp'] = datetime.now().isoformat()
+    df['version_number'] = version_number
+    df['ref_exercise'] = ref_exercise
+    df['ncb'] = ncb
+    signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()                                                                                                                                                                                                   
+    client = oci.object_storage.ObjectStorageClient({}, signer = signer) # the first empyty bracket is an empty config                                                                                                                                                                                                                                                                                                                   
+    client.put_object("frcnomajoc7v",bucket_name,bucket_path+"1_1/"+csv_file_path,bytes(df.to_csv( index=False), encoding='utf-8'))                                                                                                                                                                                       
+    print("Finished uploading {}".format(csv_file_path))
+    print(f"CSV saved to {csv_file_path}")
+
+def annex_1_2(fileName, bucket_path,file,bucket_name):
+    fileData=fileName.split("_")
+    csv_file_path = fileName[:-4]+".csv"
+    version_number = fileData[6]  
+    ref_exercise = fileData[2]     
+    ncb = fileData[4]                  
+    df = pd.read_excel(file, sheet_name="Entities to which data relates", skiprows=3)
+    df = df.dropna(axis=1, how='all').dropna(axis=0, how='all')
+    df['file_name'] = os.path.basename(fileName)
+    df['ingestion_timestamp'] = datetime.now().isoformat()
+    df['version_number'] = version_number
+    df['ref_exercise'] = ref_exercise
+    df['ncb'] = ncb
+    signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()                                                                                                                                                                                                   
+    client = oci.object_storage.ObjectStorageClient({}, signer = signer) # the first empyty bracket is an empty config                                                                                                                                                                                                                                                                                                                   
+    client.put_object("frcnomajoc7v",bucket_name,bucket_path+"1_2/"+csv_file_path,bytes(df.to_csv( index=False), encoding='utf-8'))                                                                                                                                                                                       
+    print("Finished uploading {}".format(csv_file_path))
+    print(f"CSV saved to {csv_file_path}")
+
+def annex_2(fileName,bucket_path,file,bucket_name):
+    fileData=fileName.split("_")
+    # Parameters
+    version_number = fileData[6]  
+    ref_exercise = fileData[2]     
+    ncb = fileData[4]                  
+
+    # Read the first sheet, skip the metadata rows
+    df = pd.read_excel(file.getvalue(), sheet_name="Data collection template", skiprows=6)
+
+    # Clean empty rows/columns
+    df = df.dropna(axis=1, how='all').dropna(axis=0, how='all')
+
+    # Add metadata columns
+    df['file_name'] = os.path.basename(fileName)
+    df['ingestion_timestamp'] = datetime.now().isoformat()
+    df['version_number'] = version_number
+    df['ref_exercise'] = ref_exercise
+    df['ncb'] = ncb
+
+    csvName=fileName[:-4]+"csv"
+    # Save to CSV
+    
+    signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()                                                                                                                                                                                                   
+    client = oci.object_storage.ObjectStorageClient({}, signer = signer) # the first empyty bracket is an empty config                                                                                                                                                                                                                                                                                                                   
+    client.put_object("frcnomajoc7v",bucket_name,bucket_path+"2/"+csvName,bytes(df.to_csv( index=False), encoding='utf-8'))                                                                                                                                                                                       
+    print("Finished uploading {}".format(csvName))
+    print(f"CSV saved to {csvName}")
+
+
+
+
+
+def rqsd_preflow(secret_crt_id,secret_key_id,casper_api_url,collection_id): #downloads the list of files
+    
+    crt_path=os.getcwd()+"/rqsd_tst.crt"
+    key_path=os.getcwd()+"/rqsd_tst.key.pem"
+    try:
+        with open(key_path,"w") as keyfile:
+            keyfile.write(get_secret(secret_key_id))
+        with open (crt_path,"w") as crtfile:
+            crtfile.write(get_secret(secret_crt_id))
+    except:
+        print("Failed to retrieve certificates from secrets")
+        os.remove(crt_path)
+        os.remove(key_path)
+        raise(Exception)
+    
+    protected_resource_url=casper_api_url+"/casper-api/filevault/"
+    try:
+        
+        max_date=fileManager.execute_query("SELECT to_char(max(processing_end_time),'YYYY-MM-DD HH24:mi:ss') as MAX_PROCESSING_END_TIME FROM ct_ods.a_casper_filevault")
+        if max_date is not []:
+            filterString='isTest eq False and processingStatus eq "PS_COMPLETED" and processingEndTime gt '+max_date[0].split(' ')[0]
+        else:
+            filterString='isTest eq False and processingStatus eq "PS_COMPLETED"'
+        response=requests.get(protected_resource_url+"files/"+collection_id ,headers={"accept": "application/json"},cert=(crt_path,key_path), verify=False, params={"filter": filterString})
+        
+        print(response.text)
+        files=response.json()
+    except:
+        print("Failed to retrieve ACC metadata, error during connection or request")
+        raise(Exception)
+    return files
+
+
+def rqsd_process(files,casper_api_url,bucket_path,bucket_name):
+    crt_path=os.getcwd()+"/rqsd_tst.crt"
+    key_path=os.getcwd()+"/rqsd_tst.key.pem"
+    
+    # GET request to a protected
+    for downloadable in files:
+        try:
+            print("\n\n")
+            response=requests.get(casper_api_url+"/casper-api/filevault/download/"+str(downloadable["dcId"])+'/'+str(downloadable["fileID"]) ,headers={"accept": "application/json"},cert=(crt_path, key_path),verify=False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
+            rqsd_parser(downloadable["fileName"],bucket_path,io.BytesIO(response.content),bucket_name)                                                                                                                                                                                         
+        except:
+            print(f"Failed to upload file into target bucket, files saved locally in {os.getcwd()}")
+            os.remove(crt_path)
+            os.remove(key_path)
+            raise(Exception)    
+
+
+def add_a_key_column(headers, data_rows, task_history_key):
+    headers.insert(0, 'A_KEY')
+    for i, row in enumerate(data_rows, start=1):
+        a_key_value = int(task_history_key) * TASK_HISTORY_MULTIPLIER + i
+        row.insert(0, str(a_key_value))
+
+def add_workflow_key_column(headers, data_rows, workflow_key):
+        headers.insert(1, 'A_WORKFLOW_HISTORY_KEY')
+        for row in data_rows:
+            row.insert(0, workflow_key)
+
+def initialize_config(config_file_path):
+    # Ensure the file exists
+    if not os.path.exists(config_file_path):
+        raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
+
+    # Load the configuration
+    with open(config_file_path, "r") as f:
+        config_data = yaml.safe_load(f)
+
+    return config_data
+
+
+def main(workflow_context, flow_config_path, env_config_path, env):
+
+    #init setup
+    flow_info = initialize_config(flow_config_path)
+    envs_info = initialize_config(env_config_path)
+    environment_info = envs_info[env]
+
+    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+
+    a_task_history_key = initialize_task(workflow_context, flow_info['TASK_NAME'])
+
+    # get list of files 
+    try:
+
+        files = rqsd_preflow(environment_info["CERTIFICATE_FILE"],environment_info["CERTIFICATE_KEY"],environment_info["CASPER_URL"],flow_info["COLLECTION_ID"])
+        rqsd_process(files,environment_info["CASPER_URL"],flow_info["ODS_PREFIX"],environment_info["BUCKET"])
+    except:
+        print("Failed to retrieve DEVO data, error during connection or request")
+        raise(Exception)
+    
+    # Finalize task
+    runManager.finalise_task(a_task_history_key, 'Y')
--- a/python/connectors/casper/config/env_config.yaml
+++ b/python/connectors/casper/config/env_config.yaml
@@ -0,0 +1,27 @@
+# Environment Configuration
+dev:
+  BUCKET: "mrds_inbox_dev"
+  CERTIFICATE_KEY: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya7r33ocatalf6jn6kg2xjhnya6kazlqd3e5gw6yghpd5q"
+  CERTIFICATE_FILE: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyaeva4zvj6xdihljookamhse7jlyassfjb4p45xp46bwba"
+  CASPER_URL: "https://internet.api.casper.tst.aws.tadnet.net"
+  RQSD_COLLECTION_ID: "1537"
+tst:
+  BUCKET: "mrds_inbox_tst"
+  CERTIFICATE_KEY: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya7r33ocatalf6jn6kg2xjhnya6kazlqd3e5gw6yghpd5q"
+  CERTIFICATE_FILE: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyaeva4zvj6xdihljookamhse7jlyassfjb4p45xp46bwba"
+  CASPER_URL: "https://internet.api.casper.tst.aws.tadnet.net"
+  RQSD_COLLECTION_ID: "1537"
+acc:
+  BUCKET: "mrds_inbox_acc"
+  CERTIFICATE_KEY: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya5snmftggydoszwchjra3ifa4pyiilgc26uqlhejnhcca"
+  CERTIFICATE_FILE: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyaho5t4qgmlqctew6g6mcnwpz2p7z4nhxooyl6hc5sonfa"
+  CASPER_URL: "https://internet.api.casper.stg.aws.ecb.de"
+  RQSD_COLLECTION_ID: "1116"
+prd:
+  BUCKET: "mrds_inbox_prd"
+  CERTIFICATE_KEY: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyahmv5sopfsv7nytxdyycehoyl5pd7sz5t2drn27qaneta"
+  CERTIFICATE_FILE: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyame3chyqs6cdl2igeyrvzpj3s4vrndhbgeayt533uhgqa"
+  CASPER_URL: "https://internet.api.casper.prd.aws.ecb.de"
+  RQSD_COLLECTION_ID: "1030"
+
+
--- a/python/connectors/devo/config/env_config.yaml
+++ b/python/connectors/devo/config/env_config.yaml
@@ -0,0 +1,25 @@
+# Environment Configuration
+dev:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_dev"
+  DEVO_USERNAME: "ap-informatica-ipcwt"
+  DEVO_HOSTNAME: "impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site" 
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyavrevwxke46wjgj5nz3cc5kwwsybmngbji4zepones55q"
+tst:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_tst"
+  DEVO_USERNAME: "ap-informatica-ipcwt"
+  DEVO_HOSTNAME: "t-impala.devo.escb.eu"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyaxxx7yfifpgpdnxuj6dcowpoktwa6745kwwpezysd44oa"
+acc:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_acc"
+  DEVO_USERNAME: "ap-informatica-ipcwa"
+  DEVO_HOSTNAME: "impala-proxy-devo-acc21-impala01.dw-devo-acc21.inym23.b0.cloudera.site"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya4uttfadlzreloouw2e5bifgl2dvihffym5xoq3b3jmva"
+prd:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_prd"
+  DEVO_USERNAME: "ap-informatica-ipcwp"
+  DEVO_HOSTNAME: "impala-proxy-devo-prd21-impala01.dw-devo-prd21.inym23.b0.cloudera.site"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyanbahqlucid7qtzvoohsf4xrlul7cvhlsqttmbro4n66a"
--- a/python/connectors/devo/config/env_config_rqsd.yaml
+++ b/python/connectors/devo/config/env_config_rqsd.yaml
@@ -0,0 +1,25 @@
+# Environment Configuration
+dev:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_dev"
+  DEVO_USERNAME: "ap-devo-rqsd-tst"
+  DEVO_HOSTNAME: "t-impala.devo.escb.eu"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
+tst:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_tst"
+  DEVO_USERNAME: "ap-devo-rqsd-tst"
+  DEVO_HOSTNAME: "t-impala.devo.escb.eu"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
+acc:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_acc"
+  DEVO_USERNAME: "ap-devo-rqsd-acc"
+  DEVO_HOSTNAME: "impala-proxy-devo-acc21-impala01.dw-devo-acc21.inym23.b0.cloudera.site"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyamzhgatnso57mubvg3c6k4ens3orcx4dieo6efukuvm4a"
+prd:
+  BUCKET_NAMESPACE: "frcnomajoc7v"
+  BUCKET: "mrds_inbox_prd"
+  DEVO_USERNAME: "ap-devo-rqsd-prd"
+  DEVO_HOSTNAME: "impala-proxy-devo-prd21-impala01.dw-devo-prd21.inym23.b0.cloudera.site"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyawpahgevgxv6csqnwil3p37vi6pthl466onnkg6k7undq"
--- a/python/connectors/devo/devo_connector.py
+++ b/python/connectors/devo/devo_connector.py
@@ -0,0 +1,259 @@
+# devo_impala_exporter.py
+
+import os
+import io
+import yaml
+import datetime
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+
+import pandas as pd
+from mrds.utils.secrets import get_secret
+import mrds.utils.manage_runs as runManager
+import mrds.utils.objectstore as objectstore
+
+import oci
+
+from impala.dbapi import (
+    connect,
+    ProgrammingError,
+    DatabaseError,
+    IntegrityError,
+    OperationalError,
+)
+from impala.error import HiveServer2Error
+
+TASK_HISTORY_MULTIPLIER = 1_000_000_000
+
+
+class DevoConnector:
+    """
+    Export the result of an Impala (DEVO) query to OCI Object Storage as CSV,
+    while recording task run metadata via mrds.runManager.
+
+    Usage:
+        exporter = DevoImpalaExporter(
+            flow_config_path="/path/to/flow.yaml",
+            env_config_path="/path/to/env.yaml",
+            env="dev",
+            logger=my_logger,                 # optional
+            oci_client=my_object_storage,     # optional ObjectStorageClient
+            oci_signer=my_signer,             # optional signer (used if client not provided)
+        )
+        exporter.run({"run_id": 34, "a_workflow_history_key": 6})
+    """
+
+    def __init__(
+        self,
+        flow_config_path: str,
+        env_config_path: str,
+        env: str,
+        logger: Optional[logging.Logger] = None,
+        oci_client: Optional[oci.object_storage.ObjectStorageClient] = None,
+        oci_signer: Optional[Any] = None,
+    ) -> None:
+        self.flow_info = self._initialize_config(flow_config_path)
+        envs_info = self._initialize_config(env_config_path)
+        BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE", "frcnomajoc7v")
+        
+        if env not in envs_info:
+            raise KeyError(f"Environment '{env}' not found in {env_config_path}")
+        self.environment_info = envs_info[env]
+        self.environment_info["BUCKET_NAMESPACE"]=BUCKET_NAMESPACE
+        self.env = env
+
+        # logging
+        self.logger = logger or self._default_logger(self.flow_info.get("TASK_NAME", "devo_task"))
+
+        # OCI client/signer
+        self.oci_client = oci_client
+        self.oci_signer = oci_signer
+
+    # -------------------------
+    # Public API
+    # -------------------------
+
+    def run(self, workflow_context: Dict[str, Any]) -> None:
+        """Main entry point; executes query, uploads CSV, and finalizes task."""
+        task_name = self.flow_info["TASK_NAME"]
+        a_task_history_key = self._initialize_task(workflow_context, task_name)
+        
+        try:
+            # credentials
+            devo_secret_name = self.environment_info["DEVO_SECRET"]
+            password = get_secret(devo_secret_name)
+            self.logger.info("Retrieved secret for DEVO connection.")
+
+            # query
+            query = self.flow_info["DEVO_QUERY"]
+            user = self.environment_info["DEVO_USERNAME"]
+            host = self.environment_info["DEVO_HOSTNAME"]
+
+            columns, data, rowcount = self._execute_query(query=query, user=user, hostname=host, password=password)
+            df = self._tuple_to_dataframe((columns, data))
+            self.logger.info("Query executed and DataFrame created with %d rows.", len(df))
+
+            # upload
+            if rowcount > 0:
+                csv_name = f"{self.flow_info['OUTPUT_TABLE']}.csv"
+                file_path = self._compose_object_path(self.flow_info["ODS_PREFIX"], csv_name)
+                self._upload_dataframe_to_oci(df, csv_name, file_path)
+                self.logger.info("Finished uploading %s to %s.", csv_name, file_path)
+            else:
+                return 0
+
+            # success
+            runManager.finalise_task(a_task_history_key, "Y")
+            self.logger.info("Task %s finalized successfully.", task_name)
+            return rowcount
+
+        except Exception as e:
+            # failure
+            self.logger.exception("Run failed: %s", e)
+            try:
+                runManager.finalise_task(a_task_history_key, "N")
+            finally:
+                # re-raise for upstream handling if used as a library
+                raise
+
+    # -------------------------
+    # Impala / DEVO
+    # -------------------------
+
+    @staticmethod
+    def _get_impala_connection(hostname: str, user: str, secret: str):
+        return connect(
+            host=hostname,
+            port=443,
+            auth_mechanism="PLAIN",
+            user=user,
+            password=secret,
+            use_http_transport=True,
+            http_path="cliservice",
+            use_ssl=True,
+        )
+
+    def _execute_query(self, query: str, user: str, hostname: str, password: str) -> Tuple[List[str], List[List[Any]]]:
+        conn = self._get_impala_connection(hostname, user, password)
+        cursor = None
+        self.logger.info("Executing Impala query against host '%s' as user '%s'.", hostname, user)
+        try:
+            cursor = conn.cursor()
+            cursor.execute(query)
+
+            if query.strip().lower().startswith("select") or "select" in query.strip().lower() :
+                rows = cursor.fetchall()
+                columns = [col[0] for col in cursor.description]
+                return columns, rows, cursor.rowcount
+            else:
+                # Non-SELECT: return rowcount (still return a columns list for consistency)
+                return [], [[cursor.rowcount]]
+
+        except OperationalError as oe:
+            raise RuntimeError("Failed to connect to Impala: " + str(oe)) from oe
+        except ProgrammingError as pe:
+            raise ValueError("Query syntax error: " + str(pe)) from pe
+        except IntegrityError as ie:
+            raise PermissionError("Insufficient permissions: " + str(ie)) from ie
+        except DatabaseError as db_err:
+            raise RuntimeError("Database error: " + str(db_err)) from db_err
+        except HiveServer2Error as au_err:
+            raise PermissionError("HiveServer2Error error: " + str(au_err)) from au_err
+        except Exception as e:
+            raise RuntimeError("An unexpected error occurred: " + str(e)) from e
+        finally:
+            try:
+                if cursor:
+                    cursor.close()
+            finally:
+                try:
+                    conn.close()
+                except Exception:
+                    # log but don't mask the original exception
+                    self.logger.warning("Failed to close Impala connection cleanly.", exc_info=True)
+
+    # -------------------------
+    # OCI Upload
+    # -------------------------
+
+
+    def _upload_dataframe_to_oci(self, df: pd.DataFrame, csv_name: str, object_path: str) -> None:
+        namespace = self.environment_info["BUCKET_NAMESPACE"]
+        bucket = self.environment_info["BUCKET"]
+        # convert DataFrame to CSV bytes without index
+        csv_bytes = df.to_csv(index=False).encode("utf-8")
+        client=objectstore.get_client()
+        client.put_object(namespace, bucket, object_path, csv_bytes)
+        self.logger.info("CSV '%s' uploaded to bucket '%s' (ns: '%s', key: '%s').", csv_name, bucket, namespace, object_path)
+
+    # -------------------------
+    # Utilities
+    # -------------------------
+
+    @staticmethod
+    def _tuple_to_dataframe(data_tuple: Tuple[List[str], List[List[Any]]]) -> pd.DataFrame:
+        columns, data = data_tuple
+        if not columns:
+            # for non-SELECT queries we returned rowcount; represent it in a DataFrame
+            return pd.DataFrame(data, columns=["rowcount"])
+        return pd.DataFrame(data, columns=columns)
+
+    @staticmethod
+    def _initialize_config(config_file_path: str) -> Dict[str, Any]:
+        if not os.path.exists(config_file_path):
+            raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
+        with open(config_file_path, "r") as f:
+            return yaml.safe_load(f)
+
+    @staticmethod
+    def _initialize_task(workflow_context: Dict[str, Any], task_name: str) -> int:
+        return runManager.init_task(
+            task_name,
+            workflow_context["run_id"],
+            workflow_context["a_workflow_history_key"],
+        )
+
+    @staticmethod
+    def add_a_key_column(headers: List[str], data_rows: List[List[Any]], task_history_key: int) -> None:
+        """Optionally add an A_KEY column (kept for parity with original script)."""
+        headers.insert(0, "A_KEY")
+        for i, row in enumerate(data_rows, start=1):
+            a_key_value = int(task_history_key) * TASK_HISTORY_MULTIPLIER + i
+            row.insert(0, str(a_key_value))
+
+    @staticmethod
+    def add_workflow_key_column(headers: List[str], data_rows: List[List[Any]], workflow_key: int) -> None:
+        """Optionally add the workflow key column right after A_KEY if present, otherwise at position 0."""
+        insert_idx = 1 if headers and headers[0] == "A_KEY" else 0
+        headers.insert(insert_idx, "A_WORKFLOW_HISTORY_KEY")
+        for row in data_rows:
+            row.insert(insert_idx, workflow_key)
+
+    @staticmethod
+    def _compose_object_path(prefix: str, filename: str) -> str:
+        if prefix.endswith("/"):
+            return f"{prefix}{filename}"
+        return f"{prefix}/{filename}"
+
+    @staticmethod
+    def _default_logger(task_name: str) -> logging.Logger:
+        logger = logging.getLogger(f"{task_name}_logger")
+        if not logger.handlers:
+            logger.setLevel(logging.INFO)
+            handler = logging.StreamHandler()
+            ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            fmt = logging.Formatter(f"%(asctime)s [{task_name}] %(levelname)s: %(message)s")
+            handler.setFormatter(fmt)
+            logger.addHandler(handler)
+        return logger
+
+
+# Optional: quick-run convenience if you ever want to execute this module directly.
+if __name__ == "__main__":
+    # Example only—adjust paths/env/context as needed or remove this block.
+    exporter = DevoConnector(
+        flow_config_path="/home/dbt/Marco/mrds_elt/airflow/ods/rqsd/rqsd_process/config/yaml/m_ODS_RQSD_OBSERVATIONS.yaml",
+        env_config_path="/home/dbt/Marco/mrds_elt/python/connectors/devo/config/env_config_rqsd.yaml",
+        env="dev",
+    )
+    exporter.run({"run_id": 34, "a_workflow_history_key": 6})
--- a/python/connectors/tms/TMSDBT.py
+++ b/python/connectors/tms/TMSDBT.py
@@ -0,0 +1,294 @@
+
+
+import argparse
+from TMSQuery import XMLQuery
+
+import mrds.utils.objectstore
+import tempfile
+import re
+import csv
+from io import StringIO
+
+import os.path
+import os, psutil
+import sys
+
+
+namespace = os.getenv("BUCKET_NAMESPACE", "frcnomajoc7v")
+
+def memory_usage():
+    # return the memory usage in percentage like top
+    process = psutil.Process(os.getpid())
+    mem = process.memory_info().rss/(1024*1024*1024)
+    return mem
+
+
+def protect_keyword(s):
+    s = s.lower()
+    s = s.replace(' ', '_')
+
+    match s.lower():
+        case 'comment':
+            #return '"comment"'
+            return 'comment_'
+        case 'date':
+            #return '"date"'
+            return 'date_'
+        case 'number':
+            #return '"number"'
+            return 'number_'
+        case _: 
+            return s
+
+
+cModelsDir         = sys.path[0] + '/../dbt/mrds/models/ods/'
+cDatasetMultiplier = 10000000
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("command",              choices=['create-model', 'create-oracle-table', 'retrieve'], help="create-model retrieve")
+parser.add_argument("-n", "--name",         help="Name")
+parser.add_argument("-u", "--url",          required=True, help="URL of TMS service")
+parser.add_argument("-U", "--user",         required=True, help="TMS user")
+parser.add_argument("-P", "--password",     required=True, help="TMS password")
+parser.add_argument("-x", "--xmlfile",      help="XML file")
+parser.add_argument("-l", "--layoutfile",   help="layout file")
+parser.add_argument("-f", "--format",       help="output format")
+parser.add_argument("-p", "--parameter",    action="append", help="Parameter")
+parser.add_argument("-c", "--column",       action="append", help="Additional column")
+parser.add_argument("-d", "--destination",  help="destination")
+parser.add_argument("-s", "--dataset",      help="data set ID", type=int)
+parser.add_argument("-v", "--version",      help="data model version", type=int, default=1)
+args = parser.parse_args()
+
+
+query = XMLQuery()
+
+if args.xmlfile:
+    with open(args.xmlfile) as f:
+        xml = f.read()
+    query.xml = xml
+
+
+if args.layoutfile:
+    with open(args.layoutfile) as f:
+        layout = f.read()
+    query.layout = layout
+
+if args.format:
+    query.format = args.format
+
+if args.parameter:
+    for p in args.parameter:
+        [name, value] = p.split('=', 1)
+        query.parameter[name] = value
+
+additional_columns = []
+if args.column:
+    for p in args.column:
+        [name, value] = p.split('=', 1)
+        t = re.split(r'(?:\|)|(?:/)|(?::)', name, maxsplit = 2)
+        name = t[0]
+        type = None
+        if len(t) == 2:
+            type = t[1]
+        if not type:
+            type = 'varchar2(255)'
+        additional_columns.append((name, type, value))
+
+
+query.normalize_output()
+
+from pathlib import Path
+import pprint
+p = Path('/tmp/kurt.xml')
+p.write_text(str(query))
+
+
+if args.command == 'create-oracle-table':
+
+    d = query.describe(args.url, args.user, args. password)
+
+
+    columns = ["  a_key number(38, 0)", "a_workflow_history_key number(38, 0)"]
+    for c in additional_columns:
+        columns.append("%s %s"%(c[0], c[1]))
+
+    for col in d:
+        name    = protect_keyword(col[0])
+        match col[1]:
+            case 'text':
+                columns.append(name + " varchar2(512 char)")
+            case 'int':
+                columns.append(name + " number(38,0)")
+            case 'money':
+                columns.append(name + " number(19,4)")
+            case 'floating':
+                columns.append(name + " binary_double")
+            case 'datetime':
+                columns.append(name + " date")
+            case 'integer':
+                columns.append(name + " number(12, 0)")
+
+    
+    sql = "create table ct_et_templates." + args.name + " (\n"
+    sql = sql + ",\n  ".join(columns)
+    sql = sql + "\n)\n"  
+
+    if not args.destination or args.destination == '-':
+        print(sql)
+    else:
+        with open(args.destination, 'w') as f:
+            f.write(sql)
+    
+
+
+elif args.command == 'create-ods-model':
+     
+    d = query.describe(args.url, args.user, args. password)
+
+    file_name = cModelsDir + args.name + '.yml'
+    f = open(file_name, 'w')  # open file in append mode
+
+    f.write('version: %d\n' % args.version)
+    
+    f.write('models:' + '\n')
+    f.write('  - name: ' + args.name + '_dbt\n')
+    f.write('    description: "A starter dbt model"' + '\n')
+    f.write('    columns:' + '\n')
+    for col in d:
+        f.write('      - name: ' + col[0] + '\n')
+        f.write('        data_type: ' + col[1] + '\n')
+    f.close()
+
+
+    file_name = cModelsDir + args.name + '.sql'
+    f = open(file_name, 'w')  # open file in append mode
+
+
+    if args.destination and args.destination != '-':
+        if ':' in args.destination:
+            dest = args.destination.split(':', 2)
+            path = dest[1]
+        else:
+            path = args.destination
+        prefix = os.path.dirname(path)
+    else:
+        prefix = 'INBOX/TMS/' + args.name.upper() + '/'
+
+
+    
+   
+    pars = "ptablename => '%s', ptemplatetablename => 'ou_tms.%s', pprefix => '%s'" % (args.name, args.name, prefix)
+    print(f"creating table {args.name}")
+    f.write('{{\n  config(\n  post_hook = "call ct_mrds.file_manager.create_external_table(%s)"\n  )\n}}\n\n' % pars)
+    f.write("{{ config(materialized='table') }}" + "\n")
+    f.write('with source_data as (' + "\n")
+    columns = []
+    columns.append("cast (1 as number(38,0)) as a_key")
+    columns.append("cast (1 as number(38,0)) as a_workflow_history_key")
+    for col in d:
+        name    = protect_keyword(col[0])
+        match col[1]:
+            case 'text':  
+                columns.append("cast ('x' as varchar2(255 char)) as " + name)
+            case 'int':
+                columns.append("cast (1 as number(38, 0)) as " + name)
+            case 'money':
+                columns.append("cast (1.0 as number(19,4)) as " + name)
+            case 'floating':
+                columns.append("cast (1.0 as binary_double) as " + name)
+            case 'datetime':
+                columns.append("cast (sysdate as date) as " + name)
+            case 'integer':
+                columns.append("cast (1 as number(12, 0)) as " + name)
+    f.write('  select\n   ' + ',\n   '.join(columns) + '\n')
+    f.write(')\nselect * from source_data\n ')
+    f.close()
+
+    
+    
+elif args.command == 'retrieve':
+    ret = query.execute(args.url, args.user, args. password)
+
+    if query.format in ('scsv', 'standard_csv')  and args.dataset:
+
+        # Save result to temporary spooled file for further processing
+        # We avoid doing this in memory to prevent issues with flow EffectivePermissions
+
+        f = tempfile.SpooledTemporaryFile(mode = 'w+', max_size = 200*1024*1024)
+        f.write(ret)
+        del ret
+        f.seek(0)
+
+
+        # Replace embedded newlines for '<br/>'
+        reader = csv.reader(f)
+
+        sio = StringIO() 
+
+        writer = csv.writer(sio)
+        for l in reader:
+            l_tmp = [s.replace('\n', '<br/>') for s in l]
+            writer.writerow(l_tmp)
+        f.close()
+        
+
+        # Necessary to read the data into an array of lines for further processing
+        sio.seek(0)
+        lines_tmp = sio.readlines()
+        del sio
+        
+
+        if not lines_tmp:
+            ret = ""
+        else:
+            # Adding artificial columns A_KEY and A_WORKFLOW_HISTORY_KEY and added columns
+            additional_headers = [t[0] for t in additional_columns]
+            additional_values  = [t[2] for t in additional_columns]
+            headers = ['A_KEY','A_WORKFLOW_HISTORY_KEY'] + additional_headers + [protect_keyword(h) for h in lines_tmp[0].split(',')]
+            lines = [','.join(headers) ]
+
+            i = 0
+            for l in lines_tmp[1:]:
+                lines.append(str(args.dataset*cDatasetMultiplier + i) + ',' + str(args.dataset) + ',' + ','.join(additional_values + [l]) )
+                i += 1
+
+            del lines_tmp
+
+            # Spooling again to temporary file to avoid duplication memory needs
+            f = tempfile.SpooledTemporaryFile(mode = 'w+', max_size = 200*1024*1024)
+            f.writelines(lines)
+            del lines
+            f.seek(0)
+            ret = f.read()
+            f.close()
+
+    if not args.destination or args.destination == '-':
+        print(ret, end='')
+    elif ':' not in args.destination:
+        with open(args.destination, 'w') as f:
+            f.write(ret)
+    else:
+        f = tempfile.NamedTemporaryFile(delete = False, mode = 'w', prefix = 'TMSDBT-', suffix = '.csv')
+        f.write(ret)
+        f.close()
+
+        dest = args.destination.split(':', 2)
+        bucket = dest[0]
+        dirname  = os.path.dirname(dest[1])
+        filename = os.path.basename(dest[1])
+        client = mrds.utils.objectstore.get_client()
+        with open(f.name, "r") as file:
+            print(file.read())
+        mrds.utils.objectstore.upload_file(client, f.name,namespace, bucket, dirname, filename)
+        
+        os.remove(f.name)
+
+    if ret:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+
+
--- a/python/connectors/tms/TMSQuery.py
+++ b/python/connectors/tms/TMSQuery.py
@@ -0,0 +1,197 @@
+
+import xml.etree.ElementTree as ET
+import re
+import base64
+import sys
+
+
+class XMLQuery:
+
+
+    def __init__(self, xml = None):
+
+        self._format    = 'xml'
+        self._layout    = ''
+        self._parameter = {}
+
+        if xml:
+
+            self._parse_xml(xml)
+
+
+    def _parse_xml(self, xml):
+
+        self._tree = ET.fromstring(xml)
+
+
+        layout_b64   = self._tree.find('layout').text
+        self._layout = base64.b64decode(layout_b64).decode('utf-8')
+
+        self._format = self._tree.find('format').get('type')
+
+
+        self._parameter = {}
+        for p in self._tree.findall('parameters/parameter'):
+            self._parameter[p.get('name')] = p.text
+
+
+
+    def execute(self, url, user, password):
+
+        # curl  -X POST --basic -u schilli:chili03 --data @tms_activity_interval.xml https://tmsxd104.ecbt1.tadnet.net:9443/report/
+
+        import requests
+        from requests.auth import HTTPBasicAuth
+
+
+        data = str(self)
+        basic = HTTPBasicAuth(user, password)
+
+        response = requests.post(url, data=data, auth=basic, verify=False)
+
+        if response.status_code == 200:
+            response.encoding = "utf-8"
+            return response.text
+        else:
+            return None
+
+
+    def describe(self, url, user, password):
+
+        orig_format = self.format
+        self.format = 'xml'
+
+        ret = self.execute(url, user, password)
+
+        m = re.match('^.*?\<PlainRow\>.*?\<\/PlainRow\>', ret, re.DOTALL)
+        s = m[0] + '\n</report-generator>'
+
+        tree = ET.fromstring(s)
+
+        ret = []
+        row = tree.find('PlainRow')
+        for c in row.findall('Column'):
+            #name = c.get('name')
+            name = c.text
+            type = c.get('type')
+            if type == 'unknown': type = 'integer'
+
+            ret.append((name, type))
+
+        return ret
+
+
+
+    def describe_simple(url, user, password, xml):
+
+        query = XMLQuery(xml)
+
+        query.format='xml'
+
+        ret = query.execute(url = url, user = user, password = password)
+
+        tree = ET.fromstring(ret)
+
+        ret = []
+        row = tree.find('PlainRow')
+        for c in row.findall('Column'):
+            #name = c.get('name')
+            name = c.text
+            type = c.get('type')
+            if type == 'unknown': type = 'integer'
+
+            ret.append((name, type))
+
+        return ret
+
+
+    def normalize_output(self, date_format = 'dd/MM/yyyy', time_format = 'HH:mm:ss'):
+
+        lines = self.layout.splitlines()
+
+        lines = [re.sub(r'^date_format\s*=.*', 'date_format=' + date_format, l) for l in lines]
+        lines = [re.sub(r'^time_format\s*=.*', 'time_format=' + time_format, l) for l in lines]
+        lines = [re.sub(r'^NoNumberFormatting\s*=.*', 'NoNumberFormatting=1', l) for l in lines]
+
+        self.layout = '\n'.join(lines)
+
+
+
+    def __setattr__(self, name, value):
+        if name == 'format' and value not in ('bin','xml','xml3','html','txt','csv','standard_csv', 'scsv', 'pdf'):
+            raise Exception("Invalid report format '" + value + "'")
+
+        if not name.startswith('_'):
+            name = '_' + name
+
+        if name == '_layout' and not value.endswith('\n'):
+            value = value + '\n'
+
+        if name == '_xml':
+            self._parse_xml(value)
+            return
+
+        try:
+            self.__dict__[name] = value
+        except KeyError:
+            raise AttributeError
+
+
+
+    def __getattr__(self, name):
+
+        if not name.startswith('_'):
+            name = '_' + name
+
+        try:
+            return self.__dict__[name]
+        except KeyError:
+            raise AttributeError(name)
+
+
+    def __str__(self):
+        parameters = ''
+        for k in self._parameter:
+           parameters = parameters + "\n<parameter name='%s'>%s</parameter>" % (k, self._parameter[k])
+
+        layout_b64 = base64.b64encode(self.layout.encode('utf-8')).decode('utf-8')
+        return ('<?xml version="1.0" encoding="utf-8"?>\n' + \
+               '<report-generator>\n' + \
+               ' <format type="%s"/>\n' + \
+               ' <layout>\n%s</layout>\n' +  \
+               '  <parameters>%s\n</parameters>' +  \
+               '</report-generator>') % (self._format, layout_b64, parameters)
+
+
+
+
+
+
+if __name__ == "__main__":
+
+
+    file = sys.argv[1]
+    print(file)
+
+    with open(file) as f:
+        xml = f.read()
+
+
+    query = XMLQuery(xml)
+
+    print(query.layout)
+    query.normalize_output()
+    print(query.layout)
+
+
+
+    #query.format='xml'
+
+    #ret = query.execute(url = 'https://tmsxd104.ecbt1.tadnet.net:9443/report/', user = 'schilli', password = 'chili03')
+
+    #print(ret)
+
+
+    desc = XMLQuery.describe_simple(url = 'https://tmsxd104.ecbt1.tadnet.net:9443/report/', user = 'schilli', password = 'chili03', xml = xml)
+
+    print(str(desc))
--- a/python/connectors/tms/sample_DAG.py
+++ b/python/connectors/tms/sample_DAG.py
@@ -0,0 +1,355 @@
+"""
+DAG: w_ODS_TMS_TRANSACTION (expanded example)
+Purpose:
+  - Load layout+parameter metadata from TMS-layouts/w_ODS_TMS_TRANSACTION.yml
+  - Call connectors/tms/TMSDBT.py to retrieve data into CSV in object storage
+  - On first run, generate Oracle DDL and create an external table
+  - Process file and record status in MRDS workflow tables
+Notes:
+  - This is an expanded, readable version of the factory-generated DAG.
+  - Replace paths/usernames/password references as appropriate.
+"""
+
+import copy
+import itertools
+import json
+import logging
+import os
+import re
+import subprocess
+import sys
+import traceback
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import yaml
+from airflow import DAG
+from airflow.operators.python import PythonOperator
+from pytz import timezone
+
+# --- Project-specific deps (must exist in your Airflow image) ---
+from mrds.core import main  # noqa: F401  # imported to mirror the factory env
+import mrds.utils.manage_files as mf
+import mrds.utils.manage_runs as mr
+
+# ---------- Paths & constants ----------
+gScriptDir = Path(globals().get("__file__", "./_")).absolute().parent
+gDataDir   = str(gScriptDir / "TMS-layouts") + "/"
+gConfigDir = str(gScriptDir / "config")
+gConnDir   = "/opt/airflow/python/connectors/tms/"
+gTableDir  = str(gScriptDir / "TMS-tables") + "/"
+
+DAG_NAME      = "w_ODS_TMS_TRANSACTION"
+ODS_TABLE     = DAG_NAME
+DATABASE_NAME = "ODS"
+WF_NAME       = DAG_NAME
+
+default_args = {
+    "owner": "ecb",
+    "depends_on_past": False,
+    "email_on_failure": False,
+    "email_on_retry": False,
+    "retries": 0,
+    "execution_timeout": timedelta(minutes=60),
+    "retry_delay": timedelta(minutes=5),
+}
+
+# ---------- Load YAML configs once on parse ----------
+with open(gDataDir + DAG_NAME + ".yml", "r") as f:
+    report_desc = yaml.safe_load(f) or {}
+
+with open(gConfigDir + "/TMS.yml", "r") as f:
+    tms_config = yaml.safe_load(f)
+
+# TMS + storage config
+tms_url     = tms_config["TMS-URL"]
+tms_user    = tms_config["TMS-user"]
+tms_pwd     = tms_config["TMS-password"]
+prefix      = tms_config["dest-prefix"] + DAG_NAME + "/" + DAG_NAME + "/"
+data_prefix = tms_config["data-prefix"] + DAG_NAME + "/"
+dest        = tms_config["dest-bucket"] + ":" + prefix
+
+# Visible vs hidden params (from layout YAML)
+params_visible = {}
+params_hidden  = {}
+params_dict    = report_desc.get("parameters") or {}
+for p, meta in params_dict.items():
+    val = meta.get("value", None)
+    if not meta.get("hidden", False):
+        params_visible[p] = val
+    else:
+        params_hidden[p] = val
+
+# ---------- Helpers (parameter handling) ----------
+def _enum_param_combinations_recursive(params, keys):
+    """
+    Build all combinations of params (cartesian product), supporting
+    'column(<name>)' derived lists aligned by index.
+    """
+    k = None
+    result = []
+    keys = list(keys)  # safe copy
+
+    while keys:
+        k = keys.pop(0)
+        v = params[k]
+        if v or v == "":
+            break
+
+    if not k:
+        return []
+
+    v = v if isinstance(v, list) else [v]
+
+    # derived columns aligned with v (same length)
+    derived_columns = []
+    # params_dict[k] holds the definition, not just the value
+    pdef = params_dict.get(k, {})
+    for c in list(pdef):
+        if re.match(r"column\(.*\)$", c):
+            vtmp = pdef[c]
+            vtmp = vtmp if isinstance(vtmp, list) else [vtmp]
+            derived_columns.append((c, vtmp))
+
+    if not keys:
+        for i, value in enumerate(v):
+            row = [(k, value)]
+            for col_key, aligned_values in derived_columns:
+                row.append((col_key, aligned_values[i]))
+            result.append(row)
+        return result
+
+    combinations = _enum_param_combinations_recursive(params, keys)
+    for row in combinations:
+        for i, vtmp in enumerate(v):
+            new_row = copy.deepcopy(row)
+            new_row.append((k, vtmp))
+            for col_key, aligned_values in derived_columns:
+                new_row.append((col_key, aligned_values[i]))
+            result.append(new_row)
+
+    return result
+
+
+def _enum_param_combinations(params, sequential=False):
+    # Sequential path omitted (buggy in factory; not used there either)
+    return _enum_param_combinations_recursive(params, list(params))
+
+
+def _allowed_select(table, expression, condition="1 = 1"):
+    """
+    Guarded select used by eval_params(select(...)).
+    Whitelist tables to avoid arbitrary reads.
+    """
+    if table.upper() not in (
+        ODS_TABLE.upper(),
+        "DUAL",
+        "CT_MRDS.A_WORKFLOW_HISTORY",
+    ):
+        raise Exception(f"Not allowed to select from {table}")
+    res = mr.select_ods_tab(table, expression, condition)
+    return res[0]
+
+
+def _eval_param(v):
+    """
+    Evaluate special functional values:
+      - select(...)  => guarded DB helper above
+      - eval(...)    => strongly discouraged; keep disabled or restricted
+    """
+    s = str(v) if v is not None else ""
+    if re.match(r"\s*select\(.*\)", s):
+        # Expose only 'select' symbol to eval
+        return eval(s, {"select": _allowed_select}, {})
+    if re.match(r"\s*eval\(.*\)\s*$", s):
+        # If you really must support eval, strictly sandbox or remove this path.
+        raise ValueError("eval(...) not allowed in this hardened DAG.")
+    return v
+
+
+def _finalize_param_list(param_list):
+    """
+    Apply replacements and drop virtual params according to YAML definitions.
+    """
+    d = dict(param_list)
+
+    # Replace parameter tokens inside another parameter (string replace)
+    for p, meta in params_dict.items():
+        if meta.get("replace_parameter"):
+            target = meta["replace_parameter"]
+            if target in d and p in d and isinstance(d[target], str):
+                d[target] = d[target].replace(p, str(d[p]))
+
+    # Drop 'virtual' params
+    cleaned = []
+    for k, v in d.items():
+        meta = params_dict.get(k, {})
+        if not meta.get("virtual", False):
+            cleaned.append((k, v))
+    return cleaned
+
+
+# ---------- Core work ----------
+def execute_report(**context):
+    """
+    For each parameter combination:
+      - create workflow key
+      - call TMSDBT.py retrieve to land CSV
+      - if first time, create Oracle table from generated DDL
+      - process file, finalize workflow Y/N
+    """
+    logger = logging.getLogger("airflow.task")
+    logger.setLevel(logging.DEBUG)
+
+    run_id = context["dag_run"].run_id
+    all_params = {**params_visible, **params_hidden}
+
+    # 1) Compute combinations
+    combos = _enum_param_combinations(all_params)
+
+    # 2) Evaluate select(...) etc and finalize
+    evaluated = []
+    for combo in combos or [[]]:
+        # first pass: special evaluations
+        pair_list = []
+        for k, v in combo:
+            pair_list.append((k, _eval_param(v)))
+        # second pass: replacements + pruning
+        evaluated.append(_finalize_param_list(pair_list))
+
+    # if no combos at all, ensure we run once
+    if not evaluated:
+        evaluated = [[]]
+
+    # Timing + workflow
+    ts = "{:%Y%m%d_%H%M%S}".format(datetime.now(timezone("Europe/Berlin")))
+
+    for idx, param_list in enumerate(evaluated, start=1):
+        wf_key = mr.init_workflow(DATABASE_NAME, WF_NAME, run_id)
+        file_name = f"{WF_NAME}.{wf_key}.{ts}.csv"
+
+        try:
+            # Build connector command safely (no shell quoting games)
+            cmd = [
+                sys.executable,  # 'python'
+                os.path.join(gConnDir, "TMSDBT.py"),
+                "retrieve",
+                "--name", WF_NAME,
+                "--url", tms_url,
+                "-U", tms_user,
+                "--password", tms_pwd,
+                "--layoutfile", gDataDir + DAG_NAME + ".fkr",
+                "-f", "scsv",
+                "--dataset", str(wf_key),
+                "-d", dest + file_name,
+            ]
+
+            # Map params to -p or -c switches
+            for k, v in param_list:
+                sval = "" if v is None else str(v).rstrip()
+                m = re.match(r"column\((.*)\)$", k)
+                if m:
+                    cmd.extend(["-c", f'{m.group(1)}={sval}'])
+                else:
+                    cmd.extend(["-p", f"{k}={sval}"])
+                    mr.set_workflow_property(wf_key, DATABASE_NAME, k, sval)
+
+            logger.debug("Running connector: %s", json.dumps(cmd))
+            res = subprocess.run(cmd, capture_output=True, check=False)
+            logger.debug("stdout: %s", res.stdout.decode(errors="ignore"))
+            logger.debug("stderr: %s", res.stderr.decode(errors="ignore"))
+
+            if res.returncode is None:
+                raise RuntimeError("Connector returned no status")
+            if res.returncode == 1:
+                logger.info("No data returned for wf_key=%s (continuing)", wf_key)
+                mr.finalise_workflow(wf_key, "Y")
+                continue
+            if res.returncode != 0:
+                raise RuntimeError(f"Connector failed (rc={res.returncode})")
+
+            # Data landed -> ensure source config exists, bootstrap table if needed
+            cfg = mf.execute_query(
+                "select * from CT_MRDS.A_SOURCE_FILE_CONFIG "
+                f"where a_source_key = 'TMS' and table_id = '{ODS_TABLE}'"
+            )
+
+            if not cfg:
+                # Generate DDL file
+                ddl_cmd = [
+                    sys.executable,
+                    os.path.join(gConnDir, "TMSDBT.py"),
+                    "create-oracle-table",
+                    "--name", WF_NAME,
+                    "--url", tms_url,
+                    "-U", tms_user,
+                    "--password", tms_pwd,
+                    "--layoutfile", gDataDir + DAG_NAME + ".fkr",
+                    "-d", gTableDir + WF_NAME + ".sql",
+                ]
+                for k, v in param_list:
+                    sval = "" if v is None else str(v).rstrip()
+                    m = re.match(r"column\((.*)\)$", k)
+                    if m:
+                        ddl_cmd.extend(["-c", f'{m.group(1)}={sval}'])
+                    else:
+                        ddl_cmd.extend(["-p", f"{k}={sval}"])
+
+                logger.debug("Generating DDL: %s", json.dumps(ddl_cmd))
+                ddl_res = subprocess.run(ddl_cmd, capture_output=True, check=True)
+                logger.debug("DDL stdout: %s", ddl_res.stdout.decode(errors="ignore"))
+                logger.debug("DDL stderr: %s", ddl_res.stderr.decode(errors="ignore"))
+
+                # Execute DDL and create external table
+                sql = Path(gTableDir + WF_NAME + ".sql").read_text()
+                mf.execute(sql)
+                mf.add_column_date_format(
+                    f"CT_ET_TEMPLATES.{ODS_TABLE}", "DEFAULT", "DD/MM/YYYY HH24:MI:SS"
+                )
+                mf.create_external_table(ODS_TABLE, f"CT_ET_TEMPLATES.{ODS_TABLE}", data_prefix)
+                mf.add_source_file_config(
+                    "TMS",
+                    "INPUT",
+                    DAG_NAME,
+                    DAG_NAME,
+                    r".*\.csv",
+                    ODS_TABLE,
+                    f"CT_ET_TEMPLATES.{ODS_TABLE}",
+                )
+
+            # Process landed file (register, move, etc. as per your mf impl)
+            mf.process_source_file(prefix, file_name)
+            mr.finalise_workflow(wf_key, "Y")
+
+        except BaseException as ex:
+            # rich error logging, then mark workflow failed and re-raise
+            ex_type, ex_value, ex_tb = sys.exc_info()
+            tb = traceback.extract_tb(ex_tb)
+            stack = [
+                f"File: {t[0]}, Line: {t[1]}, Func: {t[2]}, Code: {t[3]}"
+                for t in tb
+            ]
+            logging.error("Exception type: %s", ex_type.__name__)
+            logging.error("Exception message: %s", ex_value)
+            logging.error("Stack trace: %s", stack)
+            mr.finalise_workflow(wf_key, "N")
+            raise
+
+
+# ---------- DAG definition ----------
+with DAG(
+    dag_id=DAG_NAME,
+    default_args=default_args,
+    description=DAG_NAME,
+    schedule_interval=None,          # manual trigger
+    params=params_visible,           # visible-only; hidden merged inside task
+    start_date=datetime(2025, 1, 1),
+    catchup=False,
+    tags=[DAG_NAME],
+) as dag:
+
+    retrieve_report = PythonOperator(
+        task_id="retrieve_report",
+        python_callable=execute_report,
+        execution_timeout=timedelta(minutes=30),
+    )
--- a/python/devo_replicator/config/.gitkeep
+++ b/python/devo_replicator/config/.gitkeep
--- a/python/devo_replicator/config/env_config.yaml
+++ b/python/devo_replicator/config/env_config.yaml
@@ -0,0 +1,86 @@
+dev:
+  DEVO_USERNAME: "ap-devo_lab-mrds"
+  IMPALA_HOSTNAME: "impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site" 
+  HIVE_HOSTNAME: 'hs2-devo-lab21-hive01.dw-devo-lab21.om2y56.b0.cloudera.site'
+  RANGER_HOSTNAME: "https://devo-lab21-dl-gateway.devo-lab.om2y56.b0.cloudera.site:443/devo-lab21-dl/cdp-proxy-api/ranger"
+  BUCKET_PREFIX: "s3a://devo-crp-ffppyd8q/"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3tsglrzfgiyfisxchref774l5y4nrler2vn54lr3li7q"
+  S3_LOCATION_URI: "https://devo-crp-ffppyd8q.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
+  DEVO_USERNAME_RQSD: "ap-devo-rqsd-lab"
+  DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
+  FULL_ACCESS_LIST_RAR: "DISC-DC-RAR-R"
+  FULL_ACCESS_LIST_MOPDB: ""
+  FULL_ACCESS_LIST_RQSD: ""
+tst:
+  DEVO_USERNAME: "ap-devo_tst-mrds"
+  IMPALA_HOSTNAME: "t-impala.devo.escb.eu"
+  HIVE_HOSTNAME: "hs2-devo-tst21-hive01.dw-devo-tst21.om2y56.b0.cloudera.site"
+  RANGER_HOSTNAME: "https://devo-tst21-dl-gateway.devo-tst.om2y56.b0.cloudera.site:443/devo-tst21-dl/cdp-proxy-api/ranger"
+  BUCKET_PREFIX: "s3a://devo-crp-sbul3ju3/"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyayqqotyowhpoml3v5szkwhmtu4rq6bplpkvdruzupz3ma"
+  S3_LOCATION_URI: "https://devo-crp-sbul3ju3.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
+  DEVO_USERNAME_RQSD: "ap-devo-rqsd-tst"
+  DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
+  FULL_ACCESS_LIST_RAR: "DISC-TC-RAR-R"
+  FULL_ACCESS_LIST_MOPDB: ""
+  FULL_ACCESS_LIST_RQSD: ""
+acc:
+  DEVO_USERNAME: "ap-devo_acc-mrds"
+  IMPALA_HOSTNAME: "impala-proxy-devo-acc21-impala01.dw-devo-acc21.inym23.b0.cloudera.site"
+  HIVE_HOSTNAME: "hs2-devo-acc21-hive01.dw-devo-acc21.inym23.b0.cloudera.site"
+  RANGER_HOSTNAME: "https://devo-acc21-dl-gateway.devo-acc.inym23.b0.cloudera.site/devo-acc21-dl/cdp-proxy-api/ranger/"
+  BUCKET_PREFIX: "s3a://devo-crp-sbc9vbsu/"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3x3nic3vxsnpzlfshz2ubj6kekny5tvaqsnwkuh2hw2a"
+  S3_LOCATION_URI: "https://devo-crp-sbc9vbsu.bucket.vpce-0bf4fa440fb60935d-6m9iqoo9.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
+  DEVO_USERNAME_RQSD: "ap-devo-rqsd-acc"
+  DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyamzhgatnso57mubvg3c6k4ens3orcx4dieo6efukuvm4a"
+  FULL_ACCESS_LIST_RAR: "DISC-AC-RAR-R"
+  FULL_ACCESS_LIST_MOPDB: ""
+  FULL_ACCESS_LIST_RQSD: ""
+prd:
+  DEVO_USERNAME: "ap-devo_prd-mrds"
+  IMPALA_HOSTNAME: "impala-proxy-devo-prd21-impala01.dw-devo-prd21.inym23.b0.cloudera.site"
+  HIVE_HOSTNAME: "hs2-devo-prd21-hive01.dw-devo-prd21.inym23.b0.cloudera.site"
+  RANGER_HOSTNAME: "https://devo-prd21-dl-gateway.devo-prd.inym23.b0.cloudera.site/devo-prd21-dl/cdp-proxy-api/ranger/"
+  BUCKET_PREFIX: "s3a://devo-crp-2gn5maj9/"
+  DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyace73o3xowa3f3jkw4diqzoiyc6skt34sqnnx4yrbykmq"
+  S3_LOCATION_URI: "https://devo-crp-2gn5maj9.bucket.vpce-0aa6cf4490536dfd5-qgy4w5sz.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
+  DEVO_USERNAME_RQSD: "ap-devo-rqsd-prd"
+  DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyacodc43tfgumkw4qyzw4s3j4jp42vp2elakkpwwrmivqa"
+  FULL_ACCESS_LIST_RAR: "DISC-PC-RAR-R"
+  FULL_ACCESS_LIST_MOPDB: ""
+  FULL_ACCESS_LIST_RQSD: ""
+
+
+rar:
+    corporate_store: "crp_rar"
+    oracle_metadata_table: "CORR_RAR.NH_METADATA_INVENTORY"
+    oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
+    oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_RAR"
+    target_s3_bucket: "rar/db"
+    tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
+mopdb:
+    corporate_store: "crp_mopdb"
+    oracle_metadata_table: "CT_MOPDB.MOPDB_METADATA_INVENTORY"
+    oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
+    oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB"
+    target_s3_bucket: "mopdb/db"
+    tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
+rqsd:
+    corporate_store: "crp_rqsd"
+    oracle_metadata_table: "CT_MRDS.A_DEVO_METADATA_INVENTORY"
+    oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
+    oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_RQSD"
+    target_s3_bucket: "rqsd/db"
+    tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
+
+# -- target table name as
+# SELECT DISTINCT TABLE_ALIAS FROM {oracle_mgmt_table}
+# WHERE OWNER = ''
+# AND TABLE_NAME = '';
+ 
+# -- type of access
+# SELECT DISTINCT RAR3_TYPE_OF_ACCESS FROM {oracle_metadata_table}
+# WHERE A_VALID_TO > SYSDATE AND
+# OWNER = ''
+# AND TABLE_NAME = '';
--- a/python/devo_replicator/data_replicator/.gitkeep
+++ b/python/devo_replicator/data_replicator/.gitkeep
--- a/python/devo_replicator/data_replicator/diag_s3_access.py
+++ b/python/devo_replicator/data_replicator/diag_s3_access.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+import sys, json
+import boto3
+from botocore.exceptions import ClientError
+from botocore.config import Config
+
+BUCKET = "devo-crp-sbc9vbsu"
+PREFIX = "mopdb/db/"   # adjust if needed
+
+def show(e):
+    # Print the structured error if present
+    resp = getattr(e, "response", {})
+    code = resp.get("Error", {}).get("Code")
+    msg = resp.get("Error", {}).get("Message")
+    rid = resp.get("ResponseMetadata", {}).get("RequestId")
+    print(f"{type(e).__name__}: {code} {msg} (RequestId={rid})", file=sys.stderr)
+
+def main(endpoint_url=None, region=None, force_path=False):
+    session = boto3.Session()
+    cfg = Config(s3={"addressing_style": "path" if force_path else "auto"})
+    s3 = session.client("s3", region_name=region, endpoint_url=endpoint_url, config=cfg)
+    sts = session.client("sts", region_name=region)
+
+    # Who am I?
+    try:
+        ident = sts.get_caller_identity()
+        print(f"Caller: {ident['Arn']} (acct {ident['Account']})")
+    except Exception as e:
+        print("Could not call STS get-caller-identity — credentials not valid for STS.", file=sys.stderr)
+        show(e); return 1
+
+    # Is the bucket reachable at all?
+    try:
+        s3.head_bucket(Bucket=BUCKET)
+        print(f"head_bucket OK on s3://{BUCKET}")
+    except ClientError as e:
+        print("head_bucket failed:", file=sys.stderr)
+        show(e); return 2
+
+    # List with zero keys to test just the ListBucket permission
+    try:
+        s3.list_objects_v2(Bucket=BUCKET, Prefix=PREFIX, MaxKeys=0)
+        print(f"list_objects_v2 OK on prefix '{PREFIX}' (permission exists)")
+    except ClientError as e:
+        print("list_objects_v2 failed:", file=sys.stderr)
+        show(e); return 3
+
+    # Ask for 1 key to confirm data path works
+    try:
+        resp = s3.list_objects_v2(Bucket=BUCKET, Prefix=PREFIX, MaxKeys=1)
+        print("First key:", resp.get("Contents", [{}])[0].get("Key"))
+    except ClientError as e:
+        print("list_objects_v2 (MaxKeys=1) failed:", file=sys.stderr)
+        show(e); return 4
+
+    return 0
+
+if __name__ == "__main__":
+    # Allow optional args: --endpoint-url URL --region eu-central-1 --force-path
+    url = None; reg = None; force = False
+    for i,a in enumerate(sys.argv):
+        if a == "--endpoint-url": url = sys.argv[i+1]
+        if a == "--region": reg = sys.argv[i+1]
+        if a == "--force-path": force = True
+    sys.exit(main(endpoint_url=url, region=reg, force_path=force))
--- a/python/devo_replicator/data_replicator/impala_refresher.py
+++ b/python/devo_replicator/data_replicator/impala_refresher.py
@@ -0,0 +1,129 @@
+import os
+import yaml
+import datetime
+import pandas as pd
+from  mrds.utils.secrets import  get_secret
+import mrds.utils.manage_runs as runManager
+import mrds.utils.manage_files as fileManager
+import mrds.utils.sql_statements as sqls
+
+import oci
+
+from impala.dbapi import (
+    connect,
+    ProgrammingError,
+    DatabaseError,
+    IntegrityError,
+    OperationalError,
+)
+from impala.error import HiveServer2Error
+
+
+def get_impala_connection(hostname: str, user: str, secret: str):
+    conn = connect(
+        host=hostname,
+        port=443,
+        auth_mechanism="PLAIN",
+        user=user,
+        password=secret,
+        use_http_transport=True,
+        http_path="cliservice",
+        use_ssl=True
+        )
+    return conn
+
+def execute_query(query: str,user,hostname,password):
+    conn = get_impala_connection(hostname, user, password)
+    print(conn)
+    columns, result = execute_devo_query(query, conn)
+    return columns, result
+
+def execute_devo_query(query: str, conn):
+    #impersonation_configuration = {"impala.doas.user": userid} # to be changed
+    #impersonation_configuration = {} # to be changed
+    cursor = conn.cursor()
+    print("executing query")
+    try:
+        cursor.execute(query)
+        # Check if the query is a SELECT query (i.e., reads data)
+        return None, cursor.rowcount  # rowcount returns the number of rows affected
+    except OperationalError as oe:
+        raise Exception(
+            status_code=500, detail="Failed to connect to Impala: " + str(oe)
+        )
+
+    except ProgrammingError as pe:
+        raise Exception(status_code=400, detail="Query syntax error: " + str(pe))
+
+    except IntegrityError as ie:
+        raise Exception(
+            status_code=403, detail="Insufficient permissions: " + str(ie)
+        )
+
+    except DatabaseError as db_err:
+        raise Exception(status_code=500, detail="Database error: " + str(db_err))
+
+    except HiveServer2Error as au_err:
+        raise Exception(
+            status_code=403, detail="HiveServer2Error error: " + str(au_err)
+        )
+
+    except Exception as e:
+        raise Exception(
+            status_code=500, detail="An unexpected error occurred: " + str(e)
+        ) from e
+
+    finally:
+        try:
+            if cursor:
+                cursor.close()
+            if conn:
+                conn.close()
+        except Exception as e:
+            raise Exception(
+                status_code=500, detail="Failed to close the connection: " + str(e)
+            )
+
+def initialize_task(workflow_context, task_name):
+    # Initialize task
+    a_task_history_key = runManager.init_task(
+        task_name,
+        workflow_context["run_id"],
+        workflow_context["a_workflow_history_key"],
+    )
+
+    return a_task_history_key
+
+def initialize_config(config_file_path): 
+    # Ensure the file exists
+    if not os.path.exists(config_file_path):
+        raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
+
+    # Load the configuration
+    with open(config_file_path, "r") as f:
+        config_data = yaml.safe_load(f)
+
+    return config_data
+
+def main(env_config_path, env, table, corporate_store):
+
+    #init setup
+    envs_info = initialize_config(env_config_path)
+    environment_info = envs_info[env]
+
+    try:
+        devo_secret_name = environment_info["DEVO_SECRET"]
+        password = get_secret(devo_secret_name)
+    except:
+        print("Failed to retrieve credentials from secrets")    
+        raise(Exception)
+    # get devo data
+    try:
+        execute_query(f"INVALIDATE METADATA {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
+        execute_query(f"COMPUTE STATS {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
+
+    except:
+        print("Failed to retrieve DEVO data, error during connection or request")
+        raise(Exception)
+    return True
+
--- a/python/devo_replicator/data_replicator/list_s3_files_fast.py
+++ b/python/devo_replicator/data_replicator/list_s3_files_fast.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+import argparse, sys
+from urllib.parse import urlparse
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError, EndpointConnectionError, NoCredentialsError, ReadTimeoutError, ConnectTimeoutError
+
+def parse_s3_uri(s3_uri: str):
+    if not s3_uri.startswith("s3://"):
+        raise ValueError("S3 URI must start with 's3://'")
+    p = urlparse(s3_uri)
+    if not p.netloc:
+        raise ValueError("Missing bucket in S3 URI")
+    return p.netloc, p.path.lstrip("/")
+
+def parse_location(location: str):
+    """Accept s3://... OR https://...amazonaws.com/... and return (bucket, prefix)."""
+    if location.startswith("s3://"):
+        return parse_s3_uri(location)
+
+    if location.startswith(("http://", "https://")):
+        p = urlparse(location)
+        host = p.netloc
+        path = p.path.lstrip("/")
+        # Bucket-scoped VPCe host: <bucket>.bucket.vpce-xxxx.s3.<region>.vpce.amazonaws.com
+        if ".bucket." in host:
+            bucket = host.split(".bucket.", 1)[0]
+            return bucket, path
+        # Virtual-hosted: <bucket>.s3.<region>...
+        if ".s3." in host and not host.startswith("s3."):
+            bucket = host.split(".s3.", 1)[0]
+            return bucket, path
+        # Path-style: s3.<region>.../<bucket>/...
+        if host.startswith("s3."):
+            parts = path.split("/", 1)
+            bucket = parts[0]
+            prefix = parts[1] if len(parts) > 1 else ""
+            return bucket, prefix
+
+    raise ValueError(f"Unsupported location: {location}")
+
+def iter_keys(s3, bucket: str, prefix: str, page_size: int, max_items: int, verbose: bool):
+    print('here')
+    paginator = s3.get_paginator("list_objects_v2")
+    kwargs = {"Bucket": bucket, "Prefix": prefix}
+    pagination = {"PageSize": page_size}
+    if max_items > 0:
+        pagination["MaxItems"] = max_items
+
+    total = 0
+    page_num = 0
+    for page in paginator.paginate(**kwargs, PaginationConfig=pagination):
+        page_num += 1
+        contents = page.get("Contents", []) or []
+        if verbose:
+            print(f"[page {page_num}] fetched {len(contents)} keys (running total={total + len(contents)})",
+                  file=sys.stderr, flush=True)
+        for obj in contents:
+            key = obj["Key"]
+            yield key
+            total += 1
+
+def main():
+    ap = argparse.ArgumentParser(description="List files under an S3 location quickly and safely.")
+    ap.add_argument("location", help="s3://bucket/prefix/ OR https://<vpc-endpoint-host>/<prefix>")
+    ap.add_argument("--region", default=None, help="AWS region (e.g., eu-central-1)")
+    ap.add_argument("--profile", default=None, help="AWS profile to use")
+    ap.add_argument("--endpoint-url", default=None,
+                    help="Custom S3 endpoint (e.g., https://s3.eu-central-1.vpce.amazonaws.com)")
+    ap.add_argument("--force-path-addressing", action="store_true",
+                    help="Force path-style addressing (useful with bucket-scoped VPCe hostnames)")
+    ap.add_argument("--page-size", type=int, default=1000, help="S3 page size (default 1000)")
+    ap.add_argument("--max-items", type=int, default=0, help="Stop after N keys (0 = no limit)")
+    ap.add_argument("--connect-timeout", type=float, default=10.0, help="Seconds (default 10)")
+    ap.add_argument("--read-timeout", type=float, default=30.0, help="Seconds (default 30)")
+    ap.add_argument("--retries", type=int, default=3, help="Max retry attempts (default 3)")
+    ap.add_argument("--relative", action="store_true", help="Print keys relative to the prefix")
+    ap.add_argument("--verbose", "-v", action="store_true", help="Print progress to stderr")
+    args = ap.parse_args()
+
+    bucket, prefix = parse_location(args.location)
+
+    # Session & client with explicit timeouts and optional path addressing
+    sess_kwargs = {}
+    if args.profile:
+        sess_kwargs["profile_name"] = args.profile
+    session = boto3.Session(**sess_kwargs)
+
+    cfg = Config(
+        connect_timeout=args.connect_timeout,
+        read_timeout=args.read_timeout,
+        retries={"max_attempts": args.retries, "mode": "standard"},
+        s3={"addressing_style": "path" if args.force_path_addressing else "auto"},
+    )
+
+    s3 = session.client("s3", region_name=args.region, endpoint_url=args.endpoint_url, config=cfg)
+
+    # Quick preflight: try a 0-key list to surface auth/endpoint issues fast
+    try:
+        _ = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, MaxKeys=0)
+    except ClientError as e:
+        print(f"Preflight failed (auth/permissions/endpoint): {e}", file=sys.stderr)
+        sys.exit(1)
+    except (EndpointConnectionError, ReadTimeoutError, ConnectTimeoutError) as e:
+        print(f"Network/endpoint error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    try:
+        for key in iter_keys(s3, bucket, prefix, args.page_size, args.max_items, args.verbose):
+            if args.relative and prefix and key.startswith(prefix):
+                print(key[len(prefix):].lstrip("/"))
+            else:
+                print(f"s3://{bucket}/{key}")
+    except KeyboardInterrupt:
+        print("\nInterrupted.", file=sys.stderr)
+        sys.exit(130)
+    except NoCredentialsError:
+        print("No AWS credentials found. Set env vars or use --profile.", file=sys.stderr)
+        sys.exit(1)
+    except (EndpointConnectionError, ReadTimeoutError, ConnectTimeoutError) as e:
+        print(f"Network/timeout listing objects: {e}", file=sys.stderr)
+        sys.exit(1)
+    except ClientError as e:
+        print(f"AWS error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/python/devo_replicator/table_generator/.gitkeep
+++ b/python/devo_replicator/table_generator/.gitkeep
--- a/python/devo_replicator/table_generator/FlowOptions.py
+++ b/python/devo_replicator/table_generator/FlowOptions.py
@@ -0,0 +1,145 @@
+class Options:
+    def __init__(self, args):
+        self.options = {
+            "corporate_store": None,
+            "service_name": None,
+            "source_schema": None,
+            "source_table": None,
+            "access_type": None,
+            "oracle_metadata_table": None,
+            "oracle_igam_table": None,
+            "query_metadata_access_type1": None,
+            "query_metadata_access_type2a": None,
+            "query_igam_roles": None,
+            "ora_jdbc_url_dwh": None,
+            "ora_jdbc_url_ods": None,
+            "sql_file_path": None,
+            "sql_filename_grants": None,
+            "sentry_role_environment": None,
+            "ranger_script": None,
+            "type3_access_table": None,
+            "type3_access_table_key_column": None,
+            "type3_source_table_key_column": None,
+            "target_s3_bucket": None,
+            "ranger_s3_bucket": None,
+            "ranger_s3_path": None,
+            "rar_full_access_entitlement_list": None,
+            "target_table": None,
+            "tech_meta_data_fields": None,
+            "full_access_entitlement_list": None
+        }
+        # Initialize options from arguments
+        self.initialize_options(args)
+
+    def initialize_options(self, args):
+        # Assuming args is a list of key-value pairs
+        for key in args.keys():
+            if key in self.options:
+                self.options[key] = args[key]
+
+    def get_option_value(self, key):
+        return self.options.get(key, "")
+
+    @property
+    def corporate_store(self):
+        return self.get_option_value("corporate_store")
+
+    @property
+    def source_schema(self):
+        return self.get_option_value("source_schema")
+
+    @property
+    def source_table(self):
+        return self.get_option_value("source_table")
+
+    @property
+    def access_type(self):
+        return self.get_option_value("access_type")
+
+    @property
+    def oracle_metadata_table(self):
+        return self.get_option_value("oracle_metadata_table")
+        
+    @property
+    def oracle_igam_table(self):
+        return self.get_option_value("oracle_igam_table")
+
+    @property
+    def query_metadata_access_type1(self):
+        return self.get_option_value("query_metadata_access_type1")
+
+    @property
+    def query_metadata_access_type2a(self):
+        return self.get_option_value("query_metadata_access_type2a")
+
+    @property
+    def query_igam_roles(self):
+        return self.get_option_value("query_igam_roles")
+
+    @property
+    def ora_jdbc_url_dwh(self):
+        return self.get_option_value("ora_jdbc_url_dwh")
+
+    @property
+    def ora_jdbc_url_ods(self):
+        return self.get_option_value("ora_jdbc_url_ods")
+
+    @property
+    def sql_file_path(self):
+        return self.get_option_value("sql_file_path")
+
+    @property
+    def sql_filename_grants(self):
+        return self.get_option_value("sql_filename_grants")
+
+    @property
+    def sentry_role_environment(self):
+        return self.get_option_value("sentry_role_environment")
+
+    @property
+    def ranger_script(self):
+        return self.get_option_value("ranger_script")
+
+    @property
+    def type3_access_table(self):
+        return self.get_option_value("type3_access_table")
+
+    @property
+    def type3_access_table_key_column(self):
+        return self.get_option_value("type3_access_table_key_column")
+
+    @property
+    def type3_source_table_key_column(self):
+        return self.get_option_value("type3_source_table_key_column")
+
+    @property
+    def target_s3_bucket(self):
+        return self.get_option_value("target_s3_bucket")
+
+    @property
+    def ranger_s3_bucket(self):
+        return self.get_option_value("ranger_s3_bucket")
+
+    @property
+    def ranger_s3_path(self):
+        return self.get_option_value("ranger_s3_path")
+
+    @property
+    def rar_full_access_entitlement_list(self):
+        return self.get_option_value("rar_full_access_entitlement_list")    
+    
+    @property
+    def target_table(self):
+        return self.get_option_value("target_table")
+    
+    @property
+    def tech_meta_data_fields(self):
+        return self.get_option_value("tech_meta_data_fields")
+     
+    @property
+    def full_access_entitlement_list(self):
+        return self.get_option_value("full_access_entitlement_list")
+    
+    @property
+    def service_name(self):
+        return self.get_option_value("service_name")
--- a/python/devo_replicator/table_generator/devo_query.py
+++ b/python/devo_replicator/table_generator/devo_query.py
@@ -0,0 +1,73 @@
+from impala.dbapi import (
+    connect,
+    ProgrammingError,
+    DatabaseError,
+    IntegrityError,
+    OperationalError,
+)
+from impala.error import HiveServer2Error
+
+
+def get_DEVO_connection(hostname: str, user: str, secret: str):
+        conn = connect(
+            host=hostname,
+            port=443,
+            auth_mechanism="PLAIN",
+            user=user,
+            password=secret,
+            use_http_transport=True,
+            http_path="cliservice",
+            use_ssl=True,
+        )
+        return conn
+
+
+def execute_devo_query(query: str, conn):
+    cursor = None
+    try:
+        cursor = conn.cursor()
+        cursor.execute(query)        # Check if the query is a SELECT query (i.e., reads data)
+        if query.strip().lower().startswith("select"):
+            rows = cursor.fetchall()
+            columns = [col[0] for col in cursor.description]
+            return columns, rows
+        else:
+            # For non-SELECT queries (e.g., INSERT, UPDATE, DELETE), just return affected rows
+            return None, cursor.rowcount  # rowcount returns the number of rows affected
+
+    except OperationalError as oe:
+        raise Exception("Failed to connect to DEVO: " + str(oe))
+
+    except ProgrammingError as pe:
+        raise Exception("Query syntax error: " + str(pe))
+
+    except IntegrityError as ie:
+        raise Exception("Insufficient permissions: " + str(ie))
+
+    except DatabaseError as db_err:
+        raise Exception("Database error: " + str(db_err))
+
+    except HiveServer2Error as au_err:
+        raise Exception("HiveServer2Error error: " + str(au_err))
+
+    finally:
+        try:
+            if cursor:
+                cursor.close()
+            if not conn:
+                conn.close()
+        except Exception as e:
+            raise Exception(status_code=500, detail=f"Failed to close the cursor or impala connection: {str(e)}") from e
+
+def execute_query(query: str, user: str, hostname: str,password):    
+    conn = get_DEVO_connection(hostname, user, password)
+    columns, result = execute_devo_query(query, conn)
+    return columns, result
+
+
+#sql="CREATE EXTERNAL TABLE IF NOT EXISTS crp_rar.testInternalTable ( iid STRING,RANDOM_DATE DATE, number int) ;"
+#sql_drop="DROP TABLE IF EXISTS crp_rar.NH_PRICE"
+
+#print( execute_query("SELECT 1","ap-informatica-ipcwt","t-impala.devo.escb.eu","Start_123456789"))
+#print( execute_query("SELECT 1","ap-devo_tst-mrds","t-impala.devo.escb.eu","V1XqZ*#fvwQl=nRG*idI"))
+#print( execute_query("SELECT 1","ap-devo_lab-mrds","impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site","PHkvyVonyePAmZD8wUuw!"))
--- a/python/devo_replicator/table_generator/ranger_test.py
+++ b/python/devo_replicator/table_generator/ranger_test.py
@@ -0,0 +1,69 @@
+## Step 3: Let's create a policy
+
+from apache_ranger.model.ranger_service import *
+from apache_ranger.client.ranger_client import *
+from apache_ranger.model.ranger_policy  import *
+from  mrds.utils.secrets import  get_secret
+
+## Step 1: create a client to connect to Apache Ranger admin
+ranger_url  ="https://devo-lab21-dl-gateway.devo-lab.om2y56.b0.cloudera.site:443/devo-lab21-dl/cdp-proxy-api/ranger"
+password= get_secret("ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3tsglrzfgiyfisxchref774l5y4nrler2vn54lr3li7q")
+ranger_auth = ('ap-devo_lab-mrds', password)
+
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+ranger = RangerClient(ranger_url, ranger_auth)
+ranger.session.verify = False
+
+# to disable SSL certificate validation (not recommended for production use!)
+#
+# ranger.session.verify = False
+
+
+## Step 2: Let's create a service
+
+policy           = RangerPolicy()
+policy.service   = "cm_hive" #da hardcodare 
+policy.name      = 'cpo_crp_mopdb_sgroi_1' #corporatestore_table_accessType
+policy.resources = { 'database': RangerPolicyResource({ 'values': ['crp_RQSD'] }), 
+                     'table':    RangerPolicyResource({ 'values': ['ANNEX_1_1_ALL'] }),
+                     'column':   RangerPolicyResource({ 'values': ['*'] }) } #change with correct values
+allowItem1          = RangerPolicyItem() #to try allowItem1.groups
+allowItem1.groups   = ["d_mopdb_mpec"]
+#allowItem1.users    = [] #to try for single users 
+allowItem1.accesses = [ RangerPolicyItemAccess({ 'type': 'create' }),
+                        RangerPolicyItemAccess({ 'type': 'alter' }),
+                          RangerPolicyItemAccess({ 'type': 'select' }),
+                              RangerPolicyItemAccess({ 'type': 'drop' }) ]
+
+"""denyItem1          = RangerPolicyItem()
+denyItem1.users    = [ 'admin' ] #does it make sense to deny and not allow?
+denyItem1.accesses = [ RangerPolicyItemAccess({ 'type': 'drop' }) ]"""
+
+policy.policyItems     = [ allowItem1 ] 
+#policy.denyPolicyItems = [ denyItem1 ]
+#policy2=ranger.get_policy_by_id(policyId=5086)
+#print(ranger.get_policy(serviceName="cm_hive",policyName='crp_rar_testinternalTable_alcesso1'))
+#print(ranger.find_policies({"service": "cm_hive",  "resources": {"database": {"values": ["crp_rar"], "isExcludes": False , "isRecursive": False}, "column": {"values": ["*"], "isExcludes": False, "isRecursive": False}, "table": {"values": ["testInternalTable"], "isExcludes": False, "isRecursive": False}}}))
+#print(ranger.delete_policy(serviceName="cm_hive",policyName="crp_rar_testinternalTable_alcesso1"))
+#print(policy2)
+#print('Creating policy: name=' + policy.name)
+#created_policy = ranger.create_policy(policy)
+
+#print('    created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+
+## Step 4: Delete policy and service created above
+#print('Deleting policy: id=' + str(created_policy.id))
+
+#ranger.delete_policy_by_id(created_policy.id)
+
+data=ranger.get_policies_in_service(serviceName="cm_hive")
+with open("output.txt", "w") as file:
+    for string in data:
+        file.write(str(string))
+    file.close()
+
--- a/python/devo_replicator/table_generator/ranger_updater.py
+++ b/python/devo_replicator/table_generator/ranger_updater.py
@@ -0,0 +1,250 @@
+from typing import List, Optional
+from apache_ranger.model.ranger_service import *
+from apache_ranger.client.ranger_client import *
+from apache_ranger.model.ranger_policy  import *
+import re
+def add_table_permission_groups(corporate_store: str, target_table: str, access_type: str, source_table: str, igam_entitlement_list: List[str], columns_list: Optional[List[str]] = None, row_list: Optional[List[str]] = None):
+    igam_entitlements = igam_entitlement_list + ["public"] if source_table.lower() == "rar_sources_igam_sentry" else igam_entitlement_list
+    column_details = columns_list if columns_list is not None else ["*"]
+    columns = column_details
+    row_filter = row_list if row_list is not None else ["*"]
+    filter_condition = ','.join([f"'{row}'" for row in row_filter])
+    igam_roles = [x.lower()  for x in igam_entitlements if x !=""]
+    
+    return {
+        'corporate_store': corporate_store,
+        'target_table': target_table,
+        'access_type': access_type,
+        'columns': columns,
+        'rows': filter_condition,
+        'igam_roles': igam_roles
+    }
+
+from typing import List, Optional
+# --- helpers ---------------------------------------------------------------
+
+def _policy_name_from_params(config, policy_id: Optional[str] = None) -> Optional[str]:
+    """
+    Build the exact policy name used by your create functions.
+    Returns None for types where we need to match multiple (e.g., 2a without id).
+    """
+    cs = config['corporate_store'].lower()
+    tbl = config['target_table'].lower()
+    at  = config['access_type'].lower()
+    base = f"cpo_{cs}_{tbl}_{at}"
+
+    if at == "1":
+        # yaml_format_1
+        return base
+    elif at == "2a":
+        # yaml_format_2a -> requires policy_id to be exact
+        if policy_id:
+            return f"{base}_policy_{policy_id}"
+        # without policy_id, we’ll delete all that start with this prefix
+        return None
+    elif at == "2b":
+        # yaml_format_2b
+        return f"{base}_row_level_policy"
+    elif at == "3":
+        # yaml_format_3 uses same name pattern as 2b in your script
+        return f"{base}_row_level_policy"
+    else:
+        raise ValueError(f"Invalid access type '{config['access_type']}'. Expected one of: 1, 2a, 2b, 3.")
+
+
+def _ranger_client(env_config) -> RangerClient:
+    ranger_url  = env_config['RANGER_HOSTNAME']
+    ranger_auth = ( env_config['DEVO_USERNAME'],  env_config['DEVO_SECRET'])  
+    client = RangerClient(ranger_url, ranger_auth)
+    client.session.verify = False
+    return client
+
+
+# --- main deletion API -----------------------------------------------------
+
+def delete_policy(config,env_config, policy_id: Optional[str] = None) -> List[str]:
+    """
+    Delete Ranger policy/policies by name based on:
+      - params['corporate_store']
+      - params['target_table']
+      - typeOfAccess: "1", "2a", "2b", "3"
+      - policy_id: optional (only meaningful for '2a')
+    
+    Returns a list of deleted policy names.
+    """
+    ranger = _ranger_client(env_config)
+    service_name = "cm_hive"
+
+    # Try build exact name
+    deleted: List[str] = []
+
+   
+    # If we don’t have an exact name (e.g. type 2a without policy_id),
+    # delete *all* that match the expected prefix.
+    cs = config['corporate_store'].lower()
+    tbl = config['target_table'].lower()
+    at  = config['access_type'].lower()
+    prefix = f"cpo_{cs}_{tbl}_"
+    print(prefix)
+    # Fetch all policies for the table and filter client-side to reduce calls.
+    start = 0
+    candidates = []
+    page_size=1000
+    service_name="cm_hive"
+    while True:
+        params = {"pageSize": page_size, "startIndex": start}
+        page = ranger.get_policies_in_service(service_name, params=params) or []
+        candidates.extend(page)
+        if len(page) < page_size:
+            break
+        start += len(page)
+    for p in candidates:
+      name = p["name"]
+      print(f"analizing policy:{name}")  
+      if re.fullmatch(f"{prefix}([0-9]?[a-z]?)(_policy_)?([0-9]*)?(_row_level_policy)?(full_access)?$",name) != None:
+        try:
+            ranger.delete_policy_by_id(p["id"])
+            deleted.append(name)
+        except Exception:
+          # continue attempting others
+          pass
+    if not deleted:
+      raise RuntimeError(
+        f"No matching policies found for deletion with prefix '{prefix}'. "
+    )
+    return deleted
+
+
+
+def generate_policy(params,env_config, policy_id: Optional[str] = None):
+    access_type = params['access_type'].lower()
+    if access_type == "1":
+        return yaml_format_1(params,env_config)
+    elif access_type == "2a":
+        return yaml_format_2a(params, env_config, policy_id)
+    elif access_type == "2b":
+        return yaml_format_1(params,env_config)
+    elif access_type == "3":
+        return yaml_format_3(params)
+    else:
+        raise Exception(f"Invalid access type {params['access_type']}. Please check the input param")
+
+def yaml_format_1(params,env_config) -> str:
+  ranger=_ranger_client(env_config)
+
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+  policy           = RangerPolicy()
+  policy.service   = "cm_hive" #harcoded
+  policy.name      =  f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}" #corporatestore_table_accessType
+  policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }), 
+                     'table':    RangerPolicyResource({ 'values': [params['target_table']] }),
+                     'column':   RangerPolicyResource({ 'values': params['columns'] }) } 
+  
+  allowItem1          = RangerPolicyItem() 
+  allowItem1.groups   = params['igam_roles']
+  allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  policy.policyItems     = [ allowItem1 ] 
+  created_policy = ranger.create_policy(policy)
+  print('Created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+  return policy
+
+def yaml_format_2a(params, env_config,policy_id: Optional[str]) -> str:
+  policy_ID = policy_id if policy_id is not None else "0"
+
+
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+  ranger = _ranger_client(env_config)
+
+  policy           = RangerPolicy()
+  policy.service   = "cm_hive" #harcoded
+  policy.name      =  f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}" #corporatestore_table_accessType
+  policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }), 
+                     'table':    RangerPolicyResource({ 'values': [params['target_table']] }),
+                     'column':   RangerPolicyResource({ 'values': params['columns'] }) } 
+  allowItem1          = RangerPolicyItem() 
+  allowItem1.groups   = params['igam_roles']
+  allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  policy.policyItems     = [ allowItem1 ] 
+  created_policy = ranger.create_policy(policy)
+  print('    created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+  return policy
+
+def yaml_format_2b(params,env_config, full_access_list: Optional[List]) -> str:
+  
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+  ranger = _ranger_client(env_config)
+
+  policy           = RangerPolicy()
+  policy.service   = "cm_hive" #harcoded
+  policy.name      =  f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy" #corporatestore_table_accessType
+  policy.isEnabled = True
+  policy.resources ={ 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }), 
+                     'table':    RangerPolicyResource({ 'values': [params['target_table']] })} 
+  rowFilterAllowItem1= RangerRowFilterPolicyItem()
+  rowFilterAllowItem1.groups =  params['igam_roles']
+  rowFilterAllowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  rowFilterAllowItem1.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"lower(source) IN (select lower(rar_subsource_id) from {params['corporate_store'].lower()}.t_ref_rar_sources_igam_sentry where lower(rar_igam_entitlement) IN (select ad_group from {params['corporate_store'].lower()}.active_directory_user_groups where username = lower(regexp_extract(current_user(),'[^@]*',0))))" })
+  rowFilterAllowItem2= RangerRowFilterPolicyItem()
+  rowFilterAllowItem2.groups = [x.lower() for x in  full_access_list]
+  rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
+  policy.rowFilterPolicyItems= [rowFilterAllowItem1, rowFilterAllowItem2]
+ 
+  created_policy = ranger.create_policy(policy)
+  print('    created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+  return policy
+  
+
+def yaml_format_3(params, env_config,filterString, full_access_list: Optional[List]) -> str:
+   
+    ranger = _ranger_client(env_config)
+
+    policy = RangerPolicy()
+    policy.service = "cm_hive"  # hardcoded
+    policy.name = (
+        f"cpo_{params['corporate_store'].lower()}_"
+        f"{params['target_table'].lower()}_"
+        f"{params['access_type'].lower()}_row_level_policy"
+    )
+    policy.isEnabled = True
+    policy.resources = {
+        "database": RangerPolicyResource({"values": [params["corporate_store"].lower()]}),
+        "table": RangerPolicyResource({"values": [params["target_table"]]}),
+    }
+
+    # Row filter item
+    rowFilterAllowItem = RangerRowFilterPolicyItem()
+    rowFilterAllowItem.groups = params["igam_roles"]
+    rowFilterAllowItem.accesses = [RangerPolicyItemAccess({"type": "select"})]
+    rowFilterAllowItem.rowFilterInfo = RangerPolicyItemRowFilterInfo(
+        {
+            "filterExpr": filterString
+        }
+    ) 
+    rowFilterAllowItem2= RangerRowFilterPolicyItem()
+    rowFilterAllowItem2.groups = [x.lower() for x in  full_access_list]
+    rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+    rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
+
+    policy.rowFilterPolicyItems = [rowFilterAllowItem,rowFilterAllowItem2]
+
+    # Create policy in Ranger
+    created_policy = ranger.create_policy(policy)
+    print(f"    created policy: name={created_policy.name}, id={created_policy.id}")
+
+    return policy
--- a/python/devo_replicator/table_generator/ranger_updater_old.py
+++ b/python/devo_replicator/table_generator/ranger_updater_old.py
@@ -0,0 +1,345 @@
+from typing import List, Optional
+from apache_ranger.model.ranger_service import *
+from apache_ranger.client.ranger_client import *
+from apache_ranger.model.ranger_policy  import *
+import re
+def add_table_permission_groups(corporate_store: str, target_table: str, access_type: str, source_table: str, igam_entitlement_list: List[str], columns_list: Optional[List[str]] = None, row_list: Optional[List[str]] = None):
+    igam_entitlements = igam_entitlement_list + ["public"] if source_table.lower() == "rar_sources_igam_sentry" else igam_entitlement_list
+    column_details = columns_list if columns_list is not None else ["*"]
+    columns = column_details
+    row_filter = row_list if row_list is not None else ["*"]
+    filter_condition = ','.join([f"'{row}'" for row in row_filter])
+    igam_roles = [x.lower()  for x in igam_entitlements if x !=""]
+    
+    return {
+        'corporate_store': corporate_store,
+        'target_table': target_table,
+        'access_type': access_type,
+        'columns': columns,
+        'rows': filter_condition,
+        'igam_roles': igam_roles
+    }
+
+from typing import List, Optional
+# --- helpers ---------------------------------------------------------------
+
+def _policy_name_from_params(config, policy_id: Optional[str] = None) -> Optional[str]:
+    """
+    Build the exact policy name used by your create functions.
+    Returns None for types where we need to match multiple (e.g., 2a without id).
+    """
+    cs = config.corporate_store.lower()
+    tbl = config.target_table.lower()
+    at  = config.access_type.lower()
+    base = f"cpo_{cs}_{tbl}_{at}"
+
+    if at == "1":
+        # yaml_format_1
+        return base
+    elif at == "2a":
+        # yaml_format_2a -> requires policy_id to be exact
+        if policy_id:
+            return f"{base}_policy_{policy_id}"
+        # without policy_id, we’ll delete all that start with this prefix
+        return None
+    elif at == "2b":
+        # yaml_format_2b
+        return f"{base}_row_level_policy"
+    elif at == "3":
+        # yaml_format_3 uses same name pattern as 2b in your script
+        return f"{base}_row_level_policy"
+    else:
+        raise ValueError(f"Invalid access type '{config.access_type}'. Expected one of: 1, 2a, 2b, 3.")
+
+
+def _ranger_client(env_config) -> RangerClient:
+    ranger_url  = env_config['RANGER_HOSTNAME']
+    ranger_auth = ( env_config['DEVO_USERNAME'],  env_config['DEVO_SECRET'])  
+    client = RangerClient(ranger_url, ranger_auth)
+    client.session.verify = False
+    return client
+
+
+# --- main deletion API -----------------------------------------------------
+
+def delete_policy(config,env_config, policy_id: Optional[str] = None) -> List[str]:
+    """
+    Delete Ranger policy/policies by name based on:
+      - params['corporate_store']
+      - params['target_table']
+      - typeOfAccess: "1", "2a", "2b", "3"
+      - policy_id: optional (only meaningful for '2a')
+    
+    Returns a list of deleted policy names.
+    """
+    ranger = _ranger_client(env_config)
+    service_name = "cm_hive"
+
+    # Try build exact name
+    deleted: List[str] = []
+
+   
+    # If we don’t have an exact name (e.g. type 2a without policy_id),
+    # delete *all* that match the expected prefix.
+    cs = config.corporate_store.lower()
+    tbl = config.target_table.lower()
+    at  = config.access_type.lower()
+    prefix = f"cpo_{cs}_{tbl}_"
+    # Fetch all policies for the table and filter client-side to reduce calls.
+    start = 0
+    candidates = []
+    page_size=1000
+    service_name="cm_hive"
+    while True:
+        params = {"pageSize": page_size, "startIndex": start}
+        page = ranger.get_policies_in_service(service_name, params=params) or []
+        candidates.extend(page)
+        if len(page) < page_size:
+            break
+        start += len(page)
+    for p in candidates:
+      name = p["name"]
+      print(f"analizing policy:{name}")  
+      if re.fullmatch(f"{prefix}([0-9]?[a-z]?)(_policy_)?([0-9]*)?(_row_level_policy)?(full_access)?$",name) != None:
+        try:
+            ranger.delete_policy_by_id(p["id"])
+            deleted.append(name)
+        except Exception:
+          # continue attempting others
+          pass
+    if not deleted:
+      raise RuntimeError(
+        f"No matching policies found for deletion with prefix '{prefix}'. "
+        f"Provide 'policy_id' to delete a specific 2a policy."
+    )
+    return deleted
+
+
+
+def generate_policy(params,env_config, policy_id: Optional[str] = None):
+    access_type = params['access_type'].lower()
+    if access_type == "1":
+        return yaml_format_1(params,env_config)
+    elif access_type == "2a":
+        return yaml_format_2a(params, env_config, policy_id)
+    elif access_type == "2b":
+        return yaml_format_1(params,env_config)
+    elif access_type == "3":
+        return yaml_format_3(params)
+    else:
+        raise Exception(f"Invalid access type {params['access_type']}. Please check the input param")
+
+def yaml_format_1(params,env_config) -> str:
+  ranger=_ranger_client(env_config)
+
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+  policy           = RangerPolicy()
+  policy.service   = "cm_hive" #harcoded
+  policy.name      =  f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}" #corporatestore_table_accessType
+  policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }), 
+                     'table':    RangerPolicyResource({ 'values': [params['target_table']] }),
+                     'column':   RangerPolicyResource({ 'values': params['columns'] }) } 
+  
+  allowItem1          = RangerPolicyItem() 
+  allowItem1.groups   = params['igam_roles']
+  allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  policy.policyItems     = [ allowItem1 ] 
+  print(policy)
+  try:
+    created_policy = ranger.create_policy(policy)
+    print('Created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+  except:
+     pass
+  '''
+  yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: allow CRP RAR users to select core tables"
+  devo_ranger_client:
+    name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}"
+    policy:
+      service: cm_hive
+      resources:
+        database:
+          values:
+            - {params['corporate_store'].lower()}
+        table:
+          values:
+            - {params['target_table']}
+        column:
+          values:
+            {params['columns']}
+      policyItems:
+        - groups:
+            {params['igam_roles'].lower()}
+          accesses:
+            - select
+"""
+    return yaml_format'
+'''
+
+def yaml_format_2a(params, env_config,policy_id: Optional[str]) -> str:
+  policy_ID = policy_id if policy_id is not None else "0"
+
+
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+  ranger = _ranger_client(env_config)
+
+  policy           = RangerPolicy()
+  policy.service   = "cm_hive" #harcoded
+  policy.name      =  f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}" #corporatestore_table_accessType
+  policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }), 
+                     'table':    RangerPolicyResource({ 'values': [params['target_table']] }),
+                     'column':   RangerPolicyResource({ 'values': params['columns'] }) } 
+  allowItem1          = RangerPolicyItem() 
+  allowItem1.groups   = params['igam_roles']
+  allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  policy.policyItems     = [ allowItem1 ] 
+
+
+  print(policy)
+  print("\n\n")
+
+
+
+
+  #created_policy = ranger.create_policy(policy)
+  #print('    created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+
+  '''
+  yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: allow CRP RAR users to select core tables"
+  devo_ranger_client:
+    name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}"
+    policy:
+      service: cm_hive
+      resources:
+        database:
+          values:
+            - {params['corporate_store'].lower()}
+        table:
+          values:
+            - {params['target_table']}
+        column:
+          values:
+            {params['columns']}
+      policyItems:
+        - groups:
+            {params['igam_roles'].lower()}
+          accesses:
+            - select
+"""
+    return yaml_format'
+  '''
+
+def yaml_format_2b(params,env_config, full_access_list: Optional[List]) -> str:
+  
+# For Kerberos authentication
+#
+# from requests_kerberos import HTTPKerberosAuth
+#
+# ranger_auth = HTTPKerberosAuth()
+
+  ranger = _ranger_client(env_config)
+
+  policy           = RangerPolicy()
+  policy.service   = "cm_hive" #harcoded
+  policy.name      =  f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy" #corporatestore_table_accessType
+  policy.isEnabled = True
+  policy.resources ={ 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }), 
+                     'table':    RangerPolicyResource({ 'values': [params['target_table']] })} 
+  rowFilterAllowItem1= RangerRowFilterPolicyItem()
+  rowFilterAllowItem1.groups =  params['igam_roles']
+  rowFilterAllowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  rowFilterAllowItem1.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"lower(source) IN (select lower(rar_subsource_id) from {params['corporate_store'].lower()}.t_ref_rar_sources_igam_sentry where lower(rar_igam_entitlement) IN (select ad_group from {params['corporate_store'].lower()}.active_directory_user_groups where username = lower(regexp_extract(current_user(),'[^@]*',0))))" })
+  rowFilterAllowItem2= RangerRowFilterPolicyItem()
+  rowFilterAllowItem2.groups = [x.lower() for x in  full_access_list]
+  rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+  rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
+  policy.rowFilterPolicyItems= [rowFilterAllowItem1, rowFilterAllowItem2]
+  print(policy)
+
+  created_policy = ranger.create_policy(policy)
+  print('    created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
+  '''
+  yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: filter by confidentiality level"
+  devo_ranger_client:
+    name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy"
+    policy:
+      isEnabled: "true"
+      service: cm_hive
+      resources:
+        database:
+          values:
+            - {params['corporate_store'].lower()}
+        table:
+          values:
+            - {params['target_table']}
+      rowFilterPolicyItems:
+      """
+    return yaml_format
+  '''
+
+def yaml_format_3(params, env_config,filterString, full_access_list: Optional[List]) -> str:
+   
+    ranger = _ranger_client(env_config)
+
+    policy = RangerPolicy()
+    policy.service = "cm_hive"  # hardcoded
+    policy.name = (
+        f"cpo_{params['corporate_store'].lower()}_"
+        f"{params['target_table'].lower()}_"
+        f"{params['access_type'].lower()}_row_level_policy"
+    )
+    policy.isEnabled = True
+    policy.resources = {
+        "database": RangerPolicyResource({"values": [params["corporate_store"].lower()]}),
+        "table": RangerPolicyResource({"values": [params["target_table"]]}),
+    }
+
+    # Row filter item
+    rowFilterAllowItem = RangerRowFilterPolicyItem()
+    rowFilterAllowItem.groups = params["igam_roles"]
+    rowFilterAllowItem.accesses = [RangerPolicyItemAccess({"type": "select"})]
+    rowFilterAllowItem.rowFilterInfo = RangerPolicyItemRowFilterInfo(
+        {
+            "filterExpr": filterString
+        }
+    ) 
+    rowFilterAllowItem2= RangerRowFilterPolicyItem()
+    rowFilterAllowItem2.groups = [x.lower() for x in  full_access_list]
+    rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
+    rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
+
+    policy.rowFilterPolicyItems = [rowFilterAllowItem,rowFilterAllowItem2]
+    print(policy)
+    # Create policy in Ranger
+    created_policy = ranger.create_policy(policy)
+    print(f"    created policy: name={created_policy.name}, id={created_policy.id}")
+
+    return created_policy
+
+
+    """
+    yaml_format = f"- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: filter by confidentiality level"
+  devo_ranger_client:
+    name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy"
+    policy:
+      isEnabled: "true"
+      service: cm_hive
+      resources:
+        database:
+          values:
+            - {params['corporate_store'].lower()}
+        table:
+          values:
+            - {params['target_table']}
+      rowFilterPolicyItems:
+    return yaml_format
+    """
--- a/python/devo_replicator/table_generator/tableBuilderProcessor_3.py
+++ b/python/devo_replicator/table_generator/tableBuilderProcessor_3.py
@@ -0,0 +1,793 @@
+import pandasql as ps
+import pandas as pd
+import mrds.utils.manage_files as fileManager
+import logging
+import tableBuilderQueries as tbq
+from devo_query import execute_query
+import ranger_updater_old as ranger
+import os
+import yaml
+import FlowOptions as fo
+import numpy as np
+from  mrds.utils.secrets import  get_secret
+import traceback
+from mrds.utils import oraconn
+
+# Set up basic configuration for logging
+logging.basicConfig(level=logging.INFO)
+
+# Create a logger object
+logger = logging.getLogger(__name__)
+import re
+
+#0 utilities
+def initialize_config(config_file_path): 
+    # Ensure the file exists
+    if not os.path.exists(config_file_path):
+        raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
+
+    # Load the configuration
+    with open(config_file_path, "r") as f:
+        config_data = yaml.safe_load(f)
+
+    return config_data
+
+def fix_impala_sql(sql: str) -> str:
+    # List of reserved keywords in Impala that need backticks if used as column names
+    impala_reserved_keywords = {
+        'date', 'value', 'source', 'comment', 'partition', 'row', 'select', 'insert',
+        'table', 'external', 'format', 'location', 'stored', 'inputformat', 'outputformat',
+        'scenario', 'string', 'int', 'decimal', 'timestamp', 'float', 'double','procedure', 'floor'
+    }
+
+    # Regex pattern to find column definitions
+    pattern = re.compile(
+        r'(?P<col>`?\w+`?)\s+(?P<type>[A-Za-z]+\s*(?:\([^)]+\))?)\s*(?P<comment>comment\s*\'[^\']*\'|)?',
+        re.IGNORECASE
+    )
+
+    def replace(match):
+        col = match.group('col').strip('`')
+        dtype = match.group('type')
+        comment = match.group('comment') or ''
+        # Add backticks only if column name is a reserved keyword or contains special chars
+        if col.lower() in impala_reserved_keywords or not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', col):
+            col = f'`{col}`'
+        return f"{col} {dtype} {comment}".strip()
+
+    # Only replace column list part between parentheses
+    table_def_start = sql.find('(')
+    table_def_end = sql.find('ROW FORMAT SERDE', table_def_start)
+    if table_def_start == -1 or table_def_end == -1:
+        raise ValueError("Invalid SQL format: Missing column definition parentheses.")
+
+    before = sql[:table_def_start + 1]
+    columns = sql[table_def_start + 1:table_def_end]
+    after = sql[table_def_end:]
+
+    # Replace all columns inside definition
+    fixed_columns = pattern.sub(replace, columns)
+
+    # Combine and return
+    final= before + fixed_columns + after
+    final=final.replace("\\'", "").replace('\\\\', '\\')
+    return final
+
+
+def applyQueryParameters(query: str, parameters: str) -> str:
+    """
+    Replaces placeholders in the query with values from parameters.
+
+    Parameters:
+    - query: Original query string with placeholders like $$$1, $$$2, etc.
+    - parameters: Semicolon-separated string of parameter values.
+
+    Returns:
+    - String with the query filled with parameter values.
+    """
+    filled_query = query
+    if parameters:
+        # Split the parameters string and reverse the list
+        params_array = parameters.split(';')[::-1]
+        index = len(params_array)
+        for param in params_array:
+            # Replace the placeholder $$$<index> with the parameter
+            placeholder = f"$$${index}"
+            filled_query = filled_query.replace(placeholder, param)
+            index -= 1  # Decrement the index
+    return filled_query
+
+def format_column_definition(row):
+    if pd.isnull(row['data_description']):
+        # If data_description is null, only include column_name and data_type_string
+        return f"{row['column_name']} {row['data_type_string']}"
+    else:
+        # If data_description is present, include it with a comment
+        # Ensure data_description does not contain single quotes
+        data_description = str(row['data_description']).replace("'", "\\'")
+        return f"{row['column_name']} {row['data_type_string']} comment '{data_description}'"
+#1 receive table name and check for target table and access type
+
+def execute_oracle_query(sql):
+    oracle_conn = oraconn.connect('MRDS_LOADER_MOPDB')
+    cursor = oracle_conn.cursor()
+    options=cursor.execute(sql).fetchall()
+    oracle_conn.commit()
+ 
+    df = pd.DataFrame(options,columns= [row[0].lower() for row in cursor.description])
+        ## fetch db dtypes
+    cursor.close()
+    oracle_conn.close()
+    return df
+
+
+def get_target_table(oracle_mgmt_table,source_schema,source_table, env): 
+    sql=f"SELECT DISTINCT TABLE_ALIAS FROM {oracle_mgmt_table} WHERE OWNER = '{source_schema}' AND TABLE_NAME = '{source_table}'"
+    df=execute_oracle_query(sql)
+    return df
+
+def get_type_ofAccess(oracle_metadata_table,source_schema,source_table,env):
+    sql=f"SELECT DISTINCT RAR3_TYPE_OF_ACCESS FROM {oracle_metadata_table} WHERE A_VALID_TO > SYSDATE AND OWNER = '{source_schema}'AND TABLE_NAME = '{source_table}'"
+    df=execute_oracle_query(sql)    
+    return df
+
+#2 load metadata
+def readIGAMRoles( config ,env):
+    queryParams = "'" + config.sentry_role_environment + "'"
+    igamRolesQuery =  tbq.get_query_igam_roles(config.oracle_igam_table,config.service_name)
+    logger.info(f"Querying the IGAM Table")
+    
+    queryWithParamsIgamSentry = applyQueryParameters(igamRolesQuery, queryParams)
+    
+    logger.info(f"Replaced params to IGAM Table:")
+   
+    igamRoleDF = execute_oracle_query(queryWithParamsIgamSentry) 
+    return igamRoleDF
+
+def loadMetadataTable( config,env ):
+
+    metadataQuery = tbq.get_query_metadata(config.oracle_metadata_table, config.source_schema, config.source_table)
+
+    logger.info("Map Oracle metadata (data types) to Hive query: ")
+
+    jdbcMetaDataDF  = df=execute_oracle_query(metadataQuery) 
+
+    logger.info("Fetch all fields for table and concatenate them separated by ','")
+    tableDataList = jdbcMetaDataDF.apply(format_column_definition, axis=1).tolist()
+    tableFields = ",".join(tableDataList)
+
+    return tableFields
+
+
+#3 drop table and policies
+def deleteExternalTable(config,env_config):
+    try:
+        deleted=ranger.delete_policy(config,env_config)
+    except Exception as e:
+        pass
+    sql_drop = f"DROP TABLE IF EXISTS {config.corporate_store}.{config.target_table}"
+    execute_query(
+        sql_drop,
+        env_config['DEVO_USERNAME'], env_config['IMPALA_HOSTNAME'], env_config['DEVO_SECRET'],
+    )
+
+#4 create external table and policies
+def createExternalTables( config, tableFields,env_config ):
+    sql_create = (
+        f"CREATE EXTERNAL TABLE {config.corporate_store}.{config.target_table} "
+        f"({tableFields}, {config.tech_meta_data_fields}) "
+        "ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' "
+        "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' "
+        "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' "
+        f"LOCATION '{config.target_s3_bucket}/{config.target_table}' "
+        "TBLPROPERTIES ("
+        "'external.table.purge'='true', "
+        "'parquet.compression'='snappy')"
+    )
+    sql_create=fix_impala_sql(sql_create)
+    execute_query(sql_create,env_config['DEVO_USERNAME'],env_config['HIVE_HOSTNAME'],env_config['DEVO_SECRET'])
+
+def createTableFromExternal( config, tableFields,env_config ):
+    
+    sql_create = (
+        f"CREATE EXTERNAL TABLE {config.corporate_store}.{config.target_table} AS "
+        f"SELECT * FROM {config.corporate_store}.{config.target_table}_EXT"
+    )
+
+    execute_query(sql_create,env_config['DEVO_USERNAME'],env_config['HIVE_HOSTNAME'],env_config['DEVO_SECRET'])
+
+def accessTypeMapper(config, env_config, igamRoleDF):
+
+    if config.access_type.lower() == '1':
+        accessType_1(config, env_config, igamRoleDF)
+    elif (config.access_type.lower() == '2a'):
+        accessType_2A(config, env_config, igamRoleDF)
+    elif (config.access_type.lower() == '2b'):
+        accessType_2B(config, env_config, igamRoleDF)
+    elif (config.access_type.lower() == '3'):
+        accessType_3(config, env_config, igamRoleDF)
+    else:
+        logger.info(f"Invalid access type {config.access_type}. Please check the input param")
+
+def accessType_1(config, env_config, igamRoleDF):
+    logger.info("Grant privileges for access type 1")
+    logger.info("Fetch metadata from Oracle for access type 1")
+
+    # ---- Construct query and fetch from Oracle ----
+    queryParams = f"'{config.source_schema}.{config.source_table}'"
+    queryMetadataAccessType1 = tbq.get_query_metadata_access_type1(config.oracle_metadata_table)
+    queryWithParamsAccessType1 = applyQueryParameters(queryMetadataAccessType1, queryParams)
+
+    logger.info("Metadata table query: " )
+    jdbcMetaDataAccessType1DF = df=execute_oracle_query(queryWithParamsAccessType1)
+ 
+    # ---- Normalize columns ----
+    df = jdbcMetaDataAccessType1DF.copy()
+    df["rar3_type_of_access"] = df["rar3_type_of_access"].astype(str).str.strip()
+    df["source"] = df["source"].astype(str).str.strip().str.upper()
+    igamRoleDF["datasource"] = igamRoleDF["datasource"].astype(str).str.strip().str.upper()
+
+    # ---- Branch A: source != 'RAR' ----
+    left_a = (
+        df.loc[
+            (df["rar3_type_of_access"] == "1") & (df["source"] != config.service_name),
+            ["table_name", "source"]
+        ]
+        .drop_duplicates()
+    )
+
+    branch_a = (
+        left_a.merge(
+            igamRoleDF,
+            left_on="source",
+            right_on="datasource",
+            how="inner"
+        )
+        [["table_name", "source", "subsource_id", "igam_entitlement", "environment"]]
+        .drop_duplicates()
+    )
+
+    # ---- Branch B: source == 'RAR' (CROSS JOIN with igamRoleDF) ----
+    left_b = (
+        df.loc[
+            (df["rar3_type_of_access"] == "1") & (df["source"] == config.service_name),
+            ["table_name", "source"]
+        ]
+        .drop_duplicates()
+    )
+
+    if not left_b.empty:
+        branch_b = (
+            left_b.merge(igamRoleDF, how="cross")
+            [["table_name", "source", "subsource_id", "igam_entitlement", "environment"]]
+            .drop_duplicates()
+        )
+    else:
+        branch_b = pd.DataFrame(columns=["table_name", "source", "subsource_id", "igam_entitlement", "environment"])
+
+    # ---- UNION (distinct) ----
+    typeOneDF = (
+        pd.concat([branch_a, branch_b], ignore_index=True)
+          .drop_duplicates()
+          .reset_index(drop=True)
+    )
+
+    # ---- Collect IGAM entitlements ----
+    igam_entitlements = (
+        typeOneDF["igam_entitlement"]
+        .dropna()
+        .astype(str)
+        .str.strip()
+        .tolist()
+    )
+    # Extract IGAM entitlements
+
+    # Merge with optional full access list
+    if config.full_access_entitlement_list is None:
+        combined_entitlements = igam_entitlements
+    else:
+        full_access_list_clean = config.full_access_entitlement_list
+        combined_entitlements = igam_entitlements + full_access_list_clean
+
+    # Add table permission groups using YAMLFormatter
+    params = ranger.add_table_permission_groups(
+        config.corporate_store,
+        config.target_table,
+        config.access_type,
+        config.source_table,
+        combined_entitlements
+    )
+
+    # Generate the final YAML policy
+    formattedYaml = ranger.generate_policy(params,env_config, None)
+    logger.info(f"Final YAML format")
+
+    return formattedYaml
+
+
+
+
+
+def accessType_2A(config, env_config, igamRoleDF):
+    logger.info("Grant privileges for access type 2a")
+    logger.info("Fetch the metadata in Oracle for access type 2a")
+
+    # ---- Construct query and fetch from Oracle ----
+    queryParams = f"'{config.source_schema}.{config.source_table}'"
+    queryMetadataAccessType2a = tbq.get_query_metadata_access_type2a(config.oracle_metadata_table)
+    queryWithParamsAccessType2a = applyQueryParameters(queryMetadataAccessType2a, queryParams)
+
+    logger.info(f"Meta data table query: {queryWithParamsAccessType2a} ")
+    jdbcMetaDataAccessType2aDF = execute_oracle_query(queryWithParamsAccessType2a)
+
+    # ---- Normalize columns ----
+    df = jdbcMetaDataAccessType2aDF.copy()
+    df["rar3_type_of_access"] = df["rar3_type_of_access"].astype(str).str.strip().str.lower()
+    df["source"] = df["source"].astype(str).str.strip().str.upper()
+    print(df)
+
+    roles = igamRoleDF.copy()
+    # expected columns in igamRoleDF: rar_subsource_id, igam_entitlement, environment (plus anything else you keep)
+    roles["subsource_id"] = roles["subsource_id"].astype(str).str.strip().str.upper()
+    roles["igam_entitlement"] = roles["igam_entitlement"].astype(str).str.strip()
+
+    # ---- Branch A: source != service_name -> INNER JOIN on source == rar_subsource_id ----
+    left_a = (
+        df.loc[
+            (df["rar3_type_of_access"] == "2a")
+            & (df["source"] != config.service_name.upper()),
+            ["table_name", "column_name", "source"]
+        ]
+    )
+
+    branch_a = (
+        left_a.merge(
+            roles,
+            left_on="source",
+            right_on="subsource_id",
+            how="inner"
+        )
+        .drop(columns=["subsource_id", "source"], errors="ignore")
+        [["table_name", "column_name", "igam_entitlement", "environment"]]
+    )
+
+    # ---- Branch B: source == service_name -> CROSS JOIN with igamRoleDF ----
+    left_b = (
+        df.loc[
+            (df["rar3_type_of_access"] == "2a")
+            & (df["source"] == config.service_name.upper()),
+            ["table_name", "column_name", "source"]
+        ]
+    )
+
+    if not left_b.empty:
+        try:
+            branch_b = (
+                left_b.merge(roles, how="cross")
+                      .drop(columns=["subsource_id", "source"], errors="ignore")
+                      [["table_name", "column_name", "igam_entitlement", "environment"]]
+            )
+        except TypeError:
+            # pandas < 1.2 fallback
+            left_b["_cj"] = 1
+            roles["_cj"] = 1
+            branch_b = (
+                left_b.merge(roles, on="_cj")
+                      .drop(columns=["_cj", "subsource_id", "source"], errors="ignore")
+                      [["table_name", "column_name", "igam_entitlement", "environment"]]
+            )
+            # (optional) cleanup if you keep using roles later
+            roles.drop(columns=["_cj"], inplace=True, errors="ignore")
+    else:
+        branch_b = pd.DataFrame(columns=["table_name", "column_name", "igam_entitlement", "environment"])
+
+    # ---- UNION (distinct) ----
+    one_df = (
+        pd.concat([branch_a, branch_b], ignore_index=True)
+          .reset_index(drop=True)
+    )
+
+    # ---- Group 1: (table_name, igam_entitlement) -> sorted, comma-joined column_list ----
+    tmp = one_df.sort_values(["table_name", "igam_entitlement", "column_name"], kind="mergesort")
+    new_df = (
+        tmp.groupby(["table_name", "igam_entitlement"], as_index=False)["column_name"]
+           .apply(lambda s: ",".join(s.dropna().astype(str).tolist()))
+           .rename(columns={"column_name": "column_list"})
+    )
+    # Columns: table_name, igam_entitlement, column_list
+
+    # ---- Group 2: (table_name, column_list) -> comma-joined igam_entitlement ----
+    grouped = (
+        new_df.groupby(["table_name", "column_list"], as_index=False)["igam_entitlement"]
+              .apply(lambda s: ",".join(s.dropna().astype(str).tolist()))
+    )
+    # Columns: table_name, column_list, igam_entitlement
+
+    # ---- ROW_NUMBER() OVER (ORDER BY column_list) -> policy_id ----
+    grouped = grouped.sort_values(["column_list"], kind="mergesort")
+    grouped["policy_id"] = np.arange(1, len(grouped) + 1).astype(int)
+
+    # ---- Emit policies: one per (table_name, column_list) row ----
+    for _, row in grouped.iterrows():
+        entitlements_list = [e.strip() for e in str(row["igam_entitlement"]).split(",") if e.strip()]
+        columns_list = [c.strip() for c in str(row["column_list"]).split(",") if c.strip()]
+        policy_id = str(int(row["policy_id"]))
+
+        params = ranger.add_table_permission_groups(
+            config.corporate_store,
+            config.target_table,
+            config.access_type,        # "2a"
+            config.source_table,
+            entitlements_list,
+            columns_list=columns_list
+        )
+        ranger.generate_policy(params, env_config, policy_id)
+
+
+    # ---- Optional: append full-access YAML if list provided on config ----
+    if getattr(config, "full_access_entitlement_list", None):
+        # If your code already provides a list, use it directly; otherwise split string.
+        if isinstance(config.full_access_entitlement_list, list):
+            full_access_list = config.full_access_entitlement_list
+        else:
+            full_access_list = [s.strip() for s in str(config.full_access_entitlement_list).split(",") if s.strip()]
+
+        params_full = ranger.add_table_permission_groups(
+            config.corporate_store,
+            config.target_table,
+            config.access_type,    # keep same access type per your pattern
+            config.source_table,
+            full_access_list
+        )
+        ranger.generate_policy(params_full, env_config, "full_access")
+
+
+
+
+def accessType_2B(config, env_config,igamRoleDF):
+    logger.info(f"Grant privileges for access type {config.access_type}")
+    logger.info("Fetch the metadata in Oracle for access type 2b")
+
+    # --- Validate required columns ---
+    required = {"environment", "igam_entitlement", "subsource_id"}
+    missing = required - set(igamRoleDF.columns)
+    if missing:
+        raise KeyError(f"igamRoleDF missing required column(s): {sorted(missing)}")
+
+    # --- Normalize to strings (robust against None/NaN) ---
+    igamRoleDF = igamRoleDF.copy()
+    igamRoleDF["environment"] = igamRoleDF["environment"].astype(str).str.strip()
+    igamRoleDF["igam_entitlement"] = igamRoleDF["igam_entitlement"].astype(str).str.strip()
+    igamRoleDF["subsource_id"] = igamRoleDF["subsource_id"].astype(str).str.strip()
+
+    # --- Aggregation: per (environment, igam_entitlement) collect unique subsource_id list ---
+    # Keep a stable order by sorting; remove empties.
+    agg_df = (
+        igamRoleDF.loc[igamRoleDF["subsource_id"].ne(""), ["environment", "igam_entitlement", "subsource_id"]]
+                  .drop_duplicates()
+                  .sort_values(["environment", "igam_entitlement", "subsource_id"], kind="mergesort")
+                  .groupby(["environment", "igam_entitlement"], as_index=False)["subsource_id"]
+                  .agg(lambda s: ",".join(s.unique()))
+                  .rename(columns={"subsource_id": "subsource_id_list"})
+    )
+
+    # List of tuples (IGAM_ENTITLEMENT, subsource_id_list) — mirrors your log payload
+    accessType2bValidList = list(zip(
+        agg_df["igam_entitlement"].astype(str),
+        agg_df["subsource_id_list"].astype(str)
+    ))
+
+    # --- Entitlements for policy generation (unique, non-empty) ---
+    igam_entitlements = (
+        igamRoleDF["igam_entitlement"]
+        .dropna()
+        .map(str)
+        .str.strip()
+        .loc[lambda s: s.ne("")]
+        .drop_duplicates()
+        .tolist()
+    )
+
+    # --- Row-level permissions (per your existing API) ---
+    params_row_level = ranger.add_table_permission_groups(
+        config.corporate_store,
+        config.target_table,
+        config.access_type,
+        config.source_table,
+        igam_entitlements
+    )
+
+    # --- Table-level permissions, merging in full-access entitlements if provided ---
+    if getattr(config, "full_access_entitlement_list", None):
+        combined_entitlements = igam_entitlements + config.full_access_entitlement_list
+    else:
+        combined_entitlements = igam_entitlements
+
+
+    # --- Emit YAML using your helpers ---
+   
+    if getattr(config, "full_access_entitlement_list", None):
+        params = ranger.add_table_permission_groups(
+            config.corporate_store,
+            config.target_table,
+            config.access_type,
+            config.source_table,
+            combined_entitlements
+        )
+        ranger.generate_policy(params, env_config)
+
+
+    ranger.yaml_format_2b(params_row_level,env_config, config.full_access_entitlement_list)  # row-level policy
+    logger.info("Final YAML format emitted for 2B.")
+
+
+    
+
+def accessType_3(config,env_config, igamRoleDF):
+    """
+    Python/pandas translation of the Scala accessType_3.
+    Expects igamRoleDF to have at least: ['igam_entitlement', 'subsource_id'].
+    The `config` object should expose the attributes used below (names match your Scala/Python usage).
+    Uses a YAML formatter module `ranger` with:
+      - add_table_permission_groups(corporate_store, target_table, access_type, source_table, entitlements)
+      - yaml_format_3(params)
+      - yaml_format_1(params)
+    """
+
+    # --- 1) Filter entitlements where subsource_id = 'TMS' ---
+    if not {"igam_entitlement", "subsource_id"}.issubset(igamRoleDF.columns):
+        missing = {"igam_entitlement", "subsource_id"} - set(igamRoleDF.columns)
+        raise KeyError(f"igamRoleDF missing required column(s): {sorted(missing)}")
+
+    new_df = (
+        igamRoleDF.loc[
+            igamRoleDF["subsource_id"].astype(str).str.upper() == "TMS",
+            ["igam_entitlement"]
+        ].drop_duplicates()
+    )
+
+    accessType3ValidList = new_df["igam_entitlement"].astype(str).str.strip().tolist()
+
+    # --- 2) Build params for row-level groups (type 3) ---
+    params_row_level = ranger.add_table_permission_groups(
+        config.corporate_store,
+        config.target_table,
+        config.access_type,
+        config.source_table,
+        accessType3ValidList
+    )
+
+    corp = str(config.corporate_store).lower()
+    src_tbl = str(config.source_table).lower()
+
+    # --- 3) Compose the filter expressions (match Scala strings) ---
+    sqlCreateView3NonRestrString_Ptree = (
+        "(parent_fk in ( "
+        f"select portfolio_fk from {corp}.nh_portfolio_access "
+        "where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
+        "AND to_date(a_valid_to) > current_timestamp() "
+        ")) AND (child_fk in ( "
+        f"select portfolio_fk from {corp}.nh_portfolio_access "
+        "where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
+        "AND to_date(a_valid_to) > current_timestamp() "
+        "))"
+    )
+
+    sqlCreateView3NonRestrString_Pos = (
+        "position_key in ( "
+        f"select position_key from {corp}.nh_portfolio_access a "
+        f"inner join {corp}.nh_position b on ( "
+        "(b.portfolio_fk = a.portfolio_fk and b.portfolio_fk is not NULL) or "
+        "(b.portfolio_compare_fk = a.portfolio_fk and b.portfolio_compare_fk is not NULL) "
+        ") "
+        "where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
+        "AND to_date(a_valid_to) > current_timestamp() "
+        ")"
+    )
+
+    sqlCreateView3PortAccess = "lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%')"
+    sqlCreateView3LimAccess  = "lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%')"
+
+    # Standard case uses the configured key columns/table names
+    key_col  = getattr(config, "type3SourceTableKeyColumn", None)
+    acc_col  = getattr(config, "type3AccessTableKeyColumn", None)
+    acc_table= getattr(config, "type3AccessTable", None)
+    if not all([key_col, acc_col, acc_table]):
+        # Only needed for the default branch; keep None if your config doesn't use the default
+        key_col = key_col or "source_key_col"
+        acc_col = acc_col or "access_key_col"
+        acc_table = acc_table or "type3_access_table"
+
+    sqlCreateView3NonRestrString_Stdrd = (
+        f"{key_col} in (select {acc_col} from {corp}.{acc_table} "
+        "where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
+        "AND to_date(a_valid_to) > current_timestamp())"
+    )
+
+    # --- 4) Choose the filter by source table (matches Scala match/case) ---
+    if src_tbl == "nh_portfoliotree":
+        sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Ptree
+    elif src_tbl == "nh_position":
+        sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Pos
+    elif src_tbl == "nh_portfolio_access":
+        sqlCreateViewType3Filter = sqlCreateView3PortAccess
+    elif src_tbl == "nh_limit_access":
+        sqlCreateViewType3Filter = sqlCreateView3LimAccess
+    else:
+        sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Stdrd
+
+    # --- 5) Row filter YAML block (uses groups from params_row_level) ---
+    # Expecting params_row_level like {'igam_roles': '...'}; adjust key if your API differs.
+    igam_roles_lower = str(params_row_level.get("igam_roles", "")).lower()
+    rowFilter = (
+        "- groups:\n"
+        f"            {igam_roles_lower}\n"
+        "          accesses:\n"
+        "            - select\n"
+        f"          filterExpr: \"{sqlCreateViewType3Filter}\"\n"
+        "        "
+    )
+
+    # --- 6) Handle optional full access entitlements ---
+    
+
+    if config.full_access_entitlement_list:
+        paramsFullAccess = ranger.add_table_permission_groups(
+            config.corporate_store,
+            config.target_table,
+            config.access_type,
+            config.source_table,
+            config.full_access_entitlement_list
+        )
+
+        full_groups_lower = str(paramsFullAccess.get("igam_roles", "")).lower()
+        """
+        fullAccessFilter = (
+            "- groups:\n"
+            f"            {full_groups_lower}\n"
+            "          accesses:\n"
+            "            - select\n"
+            "          filterExpr: \"1=1\"\n"
+            "        "
+        )
+        """
+        params_table_level = ranger.add_table_permission_groups(
+            config.corporate_store,
+            config.target_table,
+            config.access_type,
+            config.source_table,
+            accessType3ValidList + config.full_access_entitlement_list
+        )
+    else:
+        fullAccessFilter = ""
+        params_table_level = ranger.add_table_permission_groups(
+            config.corporate_store,
+            config.target_table,
+            config.access_type,
+            config.source_table,
+            accessType3ValidList
+        )
+
+    # --- 7) Render YAML and merge like Scala ---
+    ranger.yaml_format_3(params_row_level,env_config,sqlCreateViewType3Filter,config.full_access_entitlement_list )   # base type 3 yaml
+    ranger.yaml_format_1(params_table_level,env_config) # table-level yaml
+
+
+#5 create extra policies for super-users
+#6 refresh metadata
+
+def run_process(env_file, env, service_name,source_schema,source_table,sentry_role_environment):
+    #1 receive table name and check for target table and access type
+    env_dict=initialize_config(env_file)
+    env_config=env_dict[env]
+    if service_name.lower()=='rqsd':
+        env_config["DEVO_SECRET"]=env_config["DEVO_SECRET_RQSD"]
+        env_config["DEVO_USERNAME"]=env_config["DEVO_USERNAME_RQSD"]
+    try:
+        devo_secret_name = env_config["DEVO_SECRET"]
+        env_config["DEVO_SECRET"]= get_secret(devo_secret_name)
+    except:
+        logger.error("Failed to retrieve credentials from secrets")    
+        raise(Exception)
+    db_config=env_dict[service_name]
+    try:
+        target_table=get_target_table(db_config['oracle_mgmt_table'],source_schema,source_table,env)['table_alias'][0]
+    except Exception as e:
+        logger.error("Table not found in oracle management table")
+        logger.error("Exception: %s", e)
+        logger.error("Traceback:\n%s", traceback.format_exc())
+        raise  
+    try:
+        access_type=get_type_ofAccess(db_config['oracle_metadata_table'],source_schema,source_table,env)['rar3_type_of_access'][0].strip()
+    except Exception as e:
+        logger.error("Table not found in oracle metadata inventory")
+        logger.error("Exception: %s", e)
+        logger.error("Traceback:\n%s", traceback.format_exc())
+        raise  
+    args={
+    'corporate_store':db_config['corporate_store'],
+    'service_name': service_name,
+    'source_schema':source_schema,
+    'source_table':source_table,
+    'oracle_metadata_table':db_config['oracle_metadata_table'],
+    'oracle_igam_table':db_config['oracle_igam_table'],
+    'oracle_mgmt_table': db_config['oracle_mgmt_table'],
+    'target_table':target_table,
+    'sentry_role_environment':sentry_role_environment,
+    'target_s3_bucket': env_config["BUCKET_PREFIX"]+db_config['target_s3_bucket'] ,
+    'tech_meta_data_fields': db_config['tech_meta_data_fields'],
+    'full_access_entitlement_list':env_config[f"FULL_ACCESS_LIST_{service_name.upper()}"].split(','),
+    'access_type': access_type
+    }
+    config=fo.Options(args)
+    #2 load metadata
+    tableFields=loadMetadataTable(config,env)
+    igamRoles=readIGAMRoles(config,env)
+    #3 drop table and policies
+    deleteExternalTable(config,env_config)
+    #4 create external table and policies
+    if (config.target_table[-4:].upper() == '_EXT'):
+        createExternalTables( config, tableFields,env_config )
+    else:
+        createTableFromExternal( config, tableFields,env_config)
+    
+    accessTypeMapper(config,env_config,igamRoles)
+    #5 refresh metadata
+    #execute_query(f"INVALIDATE METADATA {config.corporate_store}.{config.target_table}",env_config["DEVO_USERNAME"],env_config['IMPALA_HOSTNAME'],env_config['DEVO_SECRET'])
+    #execute_query(f"COMPUTE STATS {config.corporate_store}.{config.target_table}",env_config["DEVO_USERNAME"],env_config['IMPALA_HOSTNAME'],env_config['DEVO_SECRET'])
+
+#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/env_config.yaml",'tst','mopdb','MPEC','T_MPEC','TEST/INTEGRATION')
+
+#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'tst','rar','CORR_RAR','NH_ASSET','TEST/INTEGRATION')
+#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'dev','rar','CORR_RAR','NH_LIMIT','TEST/INTEGRATION')
+
+run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'dev','rar','CORR_RAR','NH_Asset_transactial_data'.upper(),'TEST/INTEGRATION')
+"""
+df=execute_oracle_query('select owner, table_name from CT_MRDS.A_DEVO_REPLICA_MGMT_RQSD')
+listfail=[]
+for index, row in df.iterrows():
+
+    try:
+        print("running table: ",row["table_name"])
+        run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'tst','rqsd',row['owner'],row['table_name'].upper(),'TEST/INTEGRATION')
+    except:
+        print("failed")
+        listfail.append(row["table_name"])
+    print("succeded")
+print(listfail)
+"""
+
+
+'''{"id": 48754, "guid": "d75f1491-538d-402a-a8ac-e7e21ac0be53", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_1", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, 
+"column": {"values": ["ASSET_FK", "ASSET_FK", "A_DWH_LOAD_SET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_FROM", "A_VALID_TO", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_TYPE_NO_ID", "CODE_VALUE", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}, 
+"table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], 
+"groups": ["a_mopdb_ea", "disc-au-bda"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}
+
+{"id": 48755, "guid": "5ff857c2-3683-4178-98ce-5932c0677cd4", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_2", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, 
+"column": {"values": ["ASSET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false},
+ "table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], "
+ groups": ["su-omd-reuters-users", "a_mopdb_excess_liquidity", "a-mora-lba-exp-a", "a_rar_csdb_reference_data", "a_mopdb_uc", "a_rar_csdb_ratings_data", "a_mopdb_credit_operations", "a_rar_fxcd_data", "a_rar_mdp_bbg_data", "disc-ac-riad_cnf_n-r", "a-mora-lba-ana-a", "a_mopdb_tms_data", "disc-ac-riad_core-r", "a_mopdb_mpec", "a-led-ana-a", "a-led-exp-a", "a_mopdb_ela_all"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}
+
+ {"id": 48756, "guid": "1071767f-8ef6-47be-bb9b-7077ed9e9a90", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_full_access", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, "column": {"values": ["*"], "isExcludes": false, "isRecursive": false}, "table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, 
+"policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], "groups": ["disc-ac-rar-r"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}'''
+
+
+'''
+{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_1", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, "table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false}, 
+"column": {"values": ["ASSET_FK", "ASSET_FK", "A_DWH_LOAD_SET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_FROM", "A_VALID_TO", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_TYPE_NO_ID", "CODE_VALUE", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
+   "groups": ["disc-tu-bda", "t_mopdb_ea"], "accesses": [{"type": "select", "isAllowed": true}]}]}
+
+
+
+{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_2", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, 
+"table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false},
+ "column": {"values": ["ASSET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
+ "groups": ["a-led-ana-t", "a-led-exp-t", "a-mora-lba-ana-t", "a-mora-lba-exp-t", "disc-tc-riad_cnf_n-r", "disc-tc-riad_core-r", "su-omd-reuters-users", "t_mopdb_credit_operations", "t_mopdb_ela_all", "t_mopdb_excess_liquidity", "t_mopdb_mpec", "t_mopdb_tms_data", "t_mopdb_uc", "t_rar_csdb_ratings_data", "t_rar_csdb_reference_data", "t_rar_fxcd_data", "t_rar_mdp_bbg_data"], 
+ "accesses": [{"type": "select", "isAllowed": true}]}]}
+
+
+
+{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_full_access", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
+ "table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false}, "column": {"values": ["*"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false, 
+ "groups": ["disc-dc-rar-r"], "accesses": [{"type": "select", "isAllowed": true}]}]}
+'''
--- a/python/devo_replicator/table_generator/tableBuilderQueries.py
+++ b/python/devo_replicator/table_generator/tableBuilderQueries.py
@@ -0,0 +1,130 @@
+metadata_table = "DW_RAR.NH_METADATA_INVENTORY"
+
+def get_query_metadata(metadata_table, owner, table_name):
+    query_metadata = (
+        "WITH metaDF AS ( "
+        "SELECT owner, table_name, column_id, column_name, data_type, data_precision, data_scale, "
+        "CASE WHEN data_precision IS NULL AND data_scale IS NULL THEN NULL "
+        "WHEN data_precision IS NOT NULL AND data_scale IS NULL THEN data_precision "
+        "WHEN CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN data_precision "
+        "WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN data_scale "
+        "WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN NULL "
+        "ELSE NULL END AS data_precision_hive, "
+        "CASE WHEN data_precision IS NULL AND data_scale IS NULL THEN NULL "
+        "WHEN data_precision IS NOT NULL AND data_scale IS NULL THEN CAST(0 AS INT) "
+        "WHEN CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN data_scale "
+        "WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN data_scale "
+        "WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN NULL "
+        "ELSE NULL END AS data_scale_hive, "
+        "CASE WHEN data_type LIKE '%NUMBER%' AND data_precision IS NULL AND data_scale IS NULL THEN 'String' "
+        "WHEN data_type LIKE '%NUMBER%' AND data_precision IS NOT NULL AND data_scale IS NULL THEN 'Decimal' "
+        "WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN 'Decimal' "
+        "WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN 'Decimal' "
+        "WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN 'String' "
+        "WHEN data_type LIKE '%CHAR%' THEN 'String' "
+        "WHEN data_type LIKE '%VARCHAR2%' THEN 'String' "
+        "WHEN data_type LIKE '%TIMESTAMP%' THEN 'String' "
+        "WHEN data_type LIKE '%DATE%' THEN 'String' "
+        "ELSE 'String' END AS data_type_hive, "
+        "REGEXP_REPLACE(data_description, '''', '\\''') AS data_description "
+        "FROM {0} "
+        "WHERE lower(owner||'.'||table_name) = lower('{1}'||'.'||'{2}') "
+        "AND a_valid_to > sysdate) "
+        "SELECT owner, table_name, column_id, column_name, data_type, data_precision, data_scale, "
+        "data_precision_hive, data_scale_hive, data_type_hive, "
+        "CASE WHEN data_type_hive = 'Decimal' THEN 'Decimal(' || COALESCE(CAST(data_precision_hive AS VARCHAR2(30)), '') || ',' || COALESCE(CAST(data_scale_hive AS VARCHAR2(30)), '') || ')' "
+        "ELSE data_type_hive END AS data_type_string, data_description "
+        "FROM metaDF "
+        "ORDER BY CAST(column_id AS INT) "
+    ).format(metadata_table, owner, table_name)
+    
+    return query_metadata
+
+def get_query_metadata_access_type1(metadata_table):
+    query_metadata_access_type1 = (
+        "SELECT owner, table_name, list_of_sources as SOURCE, rar3_type_of_access "
+        "FROM {0} "
+        "WHERE a_valid_to > sysdate "
+        "AND rar3_type_of_access = '1' "
+        "AND list_of_sources NOT IN 'RAR' "
+        "AND lower(owner||'.'||table_name) = lower($$$1) "
+        "UNION "
+        "SELECT owner, table_name, list_of_sources as SOURCE, rar3_type_of_access "
+        "FROM {0} "
+        "WHERE a_valid_to > sysdate "
+        "AND rar3_type_of_access = '1' "
+        "AND owner = 'CORR_REF_MAIN' "
+        "AND lower(owner||'.'||table_name) = lower($$$1) "
+    ).format(metadata_table)
+    
+    return query_metadata_access_type1
+
+def get_query_metadata_access_type2a(metadata_table):
+    query_metadata_access_type2a = (
+        "WITH rar_columns AS ( "
+        "SELECT owner, table_name, column_name, source, rar3_type_of_access "
+        "FROM ( "
+        "SELECT owner, table_name, column_name, rar3_type_of_access, list_of_sources, "
+        "tms, c2d_ea, c2d_ela, c2d_mpec, c2d_uc, ceph, lm, csdb_reference, "
+        "csdb_ratings, fxcd, mdp_bbg, mdp_reu, riad_cl, riad_ou, sdw_estr, sdw_fx, "
+        "top, rar, rtm, led, mdp_cma "
+        "FROM {0} "
+        "WHERE a_valid_to > sysdate "
+        "AND rar3_type_of_access = '2a' "
+        "AND lower(owner || '.' || table_name) = lower($$$1) "
+        "AND list_of_sources NOT LIKE '%,%' "
+        "AND upper(column_name) NOT IN ('DATABASE') "
+        ") a "
+        "UNPIVOT ( "
+        "val FOR (source) IN ( "
+        "tms AS 'TMS', c2d_ea AS 'C2D_EA', c2d_ela AS 'C2D_ELA', c2d_mpec AS 'C2D_MPEC', "
+        "c2d_uc AS 'C2D_UC', ceph AS 'CEPH', lm AS 'LM', csdb_reference AS 'CSDB_REFERENCE', "
+        "csdb_ratings AS 'CSDB_RATINGS', fxcd AS 'FXCD', mdp_bbg AS 'MDP_BBG', mdp_reu AS 'MDP_REU', "
+        "riad_cl AS 'RIAD_CL', riad_ou AS 'RIAD_OU', sdw_estr AS 'SDW_ESTR', sdw_fx AS 'SDW_FX', "
+        "top AS 'TOP', rar AS 'RAR', rtm AS 'RTM', led AS 'LED', mdp_cma AS 'MDP_CMA') "
+        ") "
+        "ORDER BY owner, table_name, column_name "
+        "), "
+        "dummy_entry AS ( "
+        "SELECT owner, table_name, 'RAR' as SOURCE, rar3_type_of_access "
+        "FROM rar_columns "
+        "FETCH FIRST ROW ONLY "
+        "), "
+        "disc_tec_fields AS ( "
+        "SELECT owner, table_name, 'TEC_INGESTION_DATE' AS column_name, source, rar3_type_of_access "
+        "FROM dummy_entry "
+        "UNION "
+        "( "
+        "SELECT owner, table_name, 'TEC_EXECUTION_DATE' AS column_name, source, rar3_type_of_access "
+        "FROM dummy_entry "
+        ") "
+        "UNION "
+        "( "
+        "SELECT owner, table_name, 'TEC_RUN_ID' AS column_name, source, rar3_type_of_access "
+        "FROM dummy_entry "
+        ") "
+        ") "
+        "SELECT owner, table_name, column_name, source, rar3_type_of_access "
+        "FROM disc_tec_fields "
+        "UNION "
+        "SELECT owner, table_name, column_name, source, rar3_type_of_access "
+        "FROM rar_columns "
+    ).format(metadata_table)
+    
+    return query_metadata_access_type2a
+
+def get_query_igam_roles(igam_table,service):
+    if service.lower() == 'rar':
+        service_entitlement='mrds'
+    elif service.lower()=='mopdb':
+        service_entitlement='mrds'
+    else:
+        service_entitlement='mrds'
+    query_igam_roles = (
+       "SELECT MRDS_subsource_id as Datasource, "
+        "MRDS_subsource_id as subsource_id, "
+        "MRDS_entitlement as IGAM_Entitlement, "
+        "environment "
+        "FROM {0} where lower(environment) = lower($$$1) and SERVICE_NAME='{1}'").format(igam_table,service.upper())
+
+    return query_igam_roles
--- a/python/devo_replicator/table_generator/testScript.py
+++ b/python/devo_replicator/table_generator/testScript.py
@@ -0,0 +1,54 @@
+import os
+
+import mrds_elt.python.devo_replicator.FlowOptions as ro
+import tableBuilderProcessor_2 as tbp
+
+# setting variables
+# args = [
+#     'corporate_store=crp_rar',
+#     'source_schema=CORR_RAR',
+#     'source_table=NH_F_RATING',
+#     'target_table=NH_F_RATING',
+#     'access_type=1',
+#     'oracle_metadata_table=CORR_RAR.NH_METADATA_INVENTORY',
+#     'oracle_igam_table=CT_REF.RAR_SOURCES_IGAM_SENTRY',
+#     'sentry_role_environment=production',
+#     'target_s3_bucket=s3a://devo-crp-ffppyd8q',
+#     'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
+#     'full_access_entitlement_list=DISC-PC-RAR-R'
+# ]
+
+# args = [
+#     'corporate_store=crp_rar',
+#     'source_schema=CORR_RAR',
+#     'source_table=NH_ASSET',
+#     'target_table=NH_ASSET',
+#     'access_type=2a',
+#     'oracle_metadata_table=CORR_RAR.NH_METADATA_INVENTORY',
+#     'oracle_igam_table=CT_REF.RAR_SOURCES_IGAM_SENTRY',
+#     'sentry_role_environment=production',
+#     'target_s3_bucket=s3a://devo-crp-ffppyd8q',
+#     'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
+#     'full_access_entitlement_list=DISC-PC-RAR-R'
+# ]
+
+
+args = [
+    'corporate_store=crp_mopdb',
+    'source_schema=MPEC',
+    'source_table=T_MPEC',
+    'oracle_metadata_table=CT_MOPDB.MOPDB_METADATA_INVENTORY',
+    'oracle_igam_table=CT_MOPDB.MOPDB_SOURCES_IGAM_SENTRY',
+    'sentry_role_environment=production',
+    'target_s3_bucket=s3a://devo-crp-sbul3ju3/mopdb/db',
+    'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
+    'full_access_entitlement_list='
+]
+
+rar_options = ro.Options(args)
+
+tableFields = tbp.loadMetadataTable(rar_options)
+tbp.createExternalTables_CRP_RAR(rar_options,tableFields )
+igamRoleDF = tbp.readIGAMRoles(rar_options) 
+
+tbp.accessTypeMapper(rar_options, igamRoleDF)
--- a/python/mrds_common/.gitignore
+++ b/python/mrds_common/.gitignore
@@ -0,0 +1,6 @@
+__pycache__
+*.log
+.venv
+.tox
+*.egg-info/
+build
--- a/python/mrds_common/CHANGELOG.md
+++ b/python/mrds_common/CHANGELOG.md
@@ -0,0 +1,72 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.6.0] - 13-10-2025
+
+### Added
+
+- new type of column xpath_element_id
+
+## [0.5.0] - 08-10-2025
+
+### Added
+
+- added new mandatory configuration parameter `archive_prefix`. App now archives source file to this location, before deleting it from inbox_prefix location.
+- log app version at runtime.
+
+### Changed
+
+- improved logging when calling database function CT_MRDS.FILE_MANAGER.PROCESS_SOURCE_FILE
+- removed local zip file deletion from version 0.4.0 to accomodate archiving at the end of the processing
+
+
+## [0.4.1] - 03-10-2025
+
+### Added
+
+- `--version` flag to CLI, now shows package version from `mrds.__version__`. ([#179](https://gitlab.sofa.dev/mrds/mrds_elt/-/merge_requests/179))
+
+## [0.4.0] - 03-10-2025
+
+### Added
+
+- App versioning!
+- Streaming algorithm when reading, filtering and enriching csv files. This drastically improves application performance in regards to RAM usage.
+- Unzipping now deletes local source zip file, after data has been extracted.
+
+## [0.3.1] - 30-09-2025
+
+### Fixed
+
+- fixed small bug related to the new encoding setting
+
+## [0.3.0] - 29-09-2025
+
+### Added
+
+- new type of config - Application config.
+These will be very specific application settings to be overridden in specific cases. Consequently, such configuration will only be optional, because rare usage is expected. First such config is encoding_type
+
+### Changed
+
+- removed output of .log files when running the application
+
+### Fixed
+
+- small bug when unzipping a file
+
+### [0.2.0] - 17-09-2025
+
+### Added
+
+- automatic deletion of the source file, and all temporary files created by the app. 
+- two new cli paramters - --keep-source-file and --keep-tmp-dir flags, to be used to avoid deleting the source file and/or temporary working directory when testing.
+- row count output in log files after enrichment.
+
+### Fixed
+
+- source and output columns in csv extraction were mistakenly swapped. This is now fixed.
--- a/python/mrds_common/README.md
+++ b/python/mrds_common/README.md
@@ -0,0 +1,328 @@
+# MRDS APP
+
+The main purpose of this application is to download XML or CSV files from source, perform some basic ETL and upload them to target.
+Below is a simplified workflow of the application.
+
+## Application workflow
+
+```mermaid
+flowchart LR
+  subgraph CoreApplication
+    direction TB
+      B[Read and validate config file] --> |If valid| C[Download source file]
+      C[Download source file] --> D[Unzip if file is ZIP]
+      D[Unzip if file is ZIP] --> E[Validate source file]
+      E --> |If valid| G[Start task defined in config file]
+      G --> H[Build output file with selected data from source]
+      H --> I[Enrich output file with metadata]
+      I --> J[Upload the output file]
+      J --> K[Trigger remote function]
+      K --> L[Check if more tasks are available in config file]
+      L --> |Yes| G
+      L --> |No| M[Archive & Delete source file]
+      M --> N[Finish workflow]
+  end
+A[Trigger app via CLI or Airflow DAG] --> CoreApplication
+```
+
+## Installation
+
+Checkout repository and cd to root project directory
+
+```shell
+cd python/mrds_common
+```
+
+Create new virtual environment using Python >=3.11
+
+```shell
+python3.11 -m venv .venv
+```
+
+Activate virtual environment
+
+```shell
+source .venv/bin/activate
+```
+
+Upgrade pip
+
+```shell
+pip install --upgrade pip
+```
+
+Install app
+
+```shell
+pip install .
+```
+
+## Environment variables
+
+There are two operating system environment variables, which are requred by the application:
+
+BUCKET_NAMESPACE - OCI namespace where main operating bucket is located (if not found - default value is frcnomajoc7v)
+
+BUCKET - main operating OCI bucket for downloading and uploading files (if not found - default value is mrds_inbox_poc)
+
+
+## Usage
+
+The application accepts two required and four optional parameters.
+
+### Parameters
+
+| Parameter                     | Short Flag | Required | Default | Description                                                                                                          |
+|-------------------------------|------------|----------|---------|----------------------------------------------------------------------------------------------------------------------|
+| `--workflow-context`          | `-w`       | No*       | None    | JSON string representing the workflow context. Must contain `run_id` and `a_workflow_history_key`.                   |
+| `--generate-workflow-context` |            | No*       |     | Flag type. If provided, app automatically generates and finalizes workflow context. Use this if `--workflow-context` is not provided. |
+| `--source-filename`           | `-s`       | Yes      | None    | Name of the source file to be looked up in source inbox set in configuration file (`inbox_prefix`).                  |
+| `--config-file`               | `-c`       | Yes      | None    | Path to the YAML configuration file. Can be absolute, or relative to current working directory.                      |
+| `--keep-source-file`          |            | No       |     | Flag type. If provided, app keeps source file, instead of archiving and deleting it. |
+| `--keep-tmp-dir`              |            | No       |     | Flag type. If provided, app keeps tmp directory, instead of deleting it. |
+
+*`--workflow-context` and `--generate-workflow-context` are both optional, however - either one of them MUST be provided for the application to run.
+
+
+### CLI
+
+```shell
+mrds-cli --workflow-context '{"run_id": "0ce35637-302c-4293-8069-3186d5d9a57d", "a_workflow_history_key": 352344}' \
+         --source-filename 'CSDB_Debt_Daily.ZIP' \
+         --config-file /home/dbt/GEORGI/projects/mrds_elt/airflow/ods/csdb/debt_daily/config/yaml/csdb_debt_daily.yaml
+```
+
+### Python module
+
+Import main function from core module and provide needed parameters:
+
+```python
+from mrds.core import main
+from mrds.utils.manage_runs import init_workflow, finalise_workflow
+from mrds.utils.static_vars import status_success, status_failed
+
+import datetime
+import logging
+import sys
+
+# Configure logging for your needs. This is just a sample
+current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+log_filename = f"mrds_{current_time}.log"
+logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s - %(message)s",
+        handlers=[
+                logging.FileHandler(log_filename),
+                logging.StreamHandler(sys.stdout),
+        ],
+)
+
+STATUS_SUCCESS = status_success
+STATUS_FAILURE = status_failed
+
+# Run time parameters
+
+run_id = "0ce35637-302c-4293-8069-3186d5d9a57d"
+a_workflow_history_key = init_workflow(database_name='ODS', workflow_name='w_OU_C2D_UC_DISSEM', workflow_run_id=run_id)
+
+workflow_context = {
+    "run_id": run_id,
+    "a_workflow_history_key": a_workflow_history_key,
+}
+
+source_filename = "CSDB_Debt_Daily.ZIP"
+config_file = "/home/dbt/GEORGI/projects/mrds_elt/airflow/ods/csdb/debt_daily/config/yaml/csdb_debt_daily.yaml"
+
+main(workflow_context, source_filename, config_file)
+
+# implement your desired error handling logic and provide correct status to function finalize_workflow
+
+finalise_workflow(workflow_context["a_workflow_history_key"], STATUS_SUCCESS) 
+
+
+```
+
+## Configuration
+
+### Generate workflow context
+
+Use this if you are using the application in standalone mode. Workflow context will be generated, and then finalized.
+
+### Source filename
+
+This is the source file name to be looked up in in source inbox set in the configuration file (`inbox_prefix`).
+
+### Workflow context
+
+This is a JSON string (or from the application standpoint view - dictionary) containing run_id and a_workflow_history_key values.
+
+```JSON
+workflow_context = {
+    "run_id": "0ce35637-302c-4293-8069-3186d5d9a57d",
+    "a_workflow_history_key": 352344,
+}
+```
+
+run_id - this represent orchestration ID. Can be any string ID of your choice, for example Airflow DAG ID.
+a_workflow_history_key - can be generated via mrds.utils.manage_runs.init_workflow() function.
+
+If you provide workflow context by yourself, you need to take care of finalizing it too.
+
+### Config file
+
+This is the main place which we can control the application.
+
+At the top, are the Application configurations. These apply to all tasks. These are all optional and are used to override some specific runtime application settings.
+
+```yaml
+# System configurations
+
+encoding_type: cp1252 # Overrides default encoding type (utf-8) of the app. This encoding is used when reading source csv/xml files and when writing the output csv files of the app. For codec naming, follow guidelines here - https://docs.python.org/3/library/codecs.html#standard-encodings
+```
+
+After that, are the global configurations. These apply to all tasks:
+
+```yaml
+# Global configurations
+tmpdir: /tmp # root temporary directory to create runtime temporary directory, download source file and perform operations on it, before upload it to target
+inbox_prefix: INBOX/C2D/UC_DISSEM # prefix for the inbox containing the source file
+archive_prefix: ARCHIVE/C2D/UC_DISSEM # prefix for the archive bucket
+workflow_name: w_OU_C2D_UC_DISSEM # name of the particular workflow
+validation_schema_path: 'xsd/UseOfCollateralMessage.xsd' # relative path (to runtime location) to schema used to validate XML or CSV file
+file_type: xml # file type of the expected source file - either CSV or XML
+```
+
+Following, there is a list of tasks to be performed on the source file.
+We can have multiple tasks per file, meaning - we can generate more than one output file, from one source file.
+Further, one of the key configuration parameters per task is "output_columns". There we define columns of the final output file.
+There are several types of columns:
+
+xpath - this type of column is used when source file is XML. It is a standart xpath expression, pointing to path in the xml.
+
+xpath_element_id - this type of column is used when we need to id a particular xml element. Used to create foreign keys between two separate tasks. It is a standart xpath expression, pointing to path in the xml.
+
+csv_header - this type of column is used when source file is CSV. It just points to the corresponding csv header in the source file.
+
+a_key - generates key unique per row.
+
+workflow_key - generates key unique per run of the application
+
+static - allows the user to define column with static value
+
+The application respects the order of the output columns in the configuration file, when generating the output file.
+Data and columns from the source file, not included in the configuration file, will not be present in the final output file.
+
+Example of xml task configuration:
+
+```yaml
+# List of tasks
+tasks:
+  - task_name: ou_lm_standing_facilities_header_create_file # name of the particular task
+    ods_prefix: INBOX/LM/STANDING_FACILITIES/STANDING_FACILITIES_HEADER # prefix for the upload location
+    output_table: standing_facilities_headers # table in Oracle
+    namespaces:
+      ns2: 'http://escb.ecb.int/sf' # XML namespace
+    output_columns: # Columns in the output file, order will be respected.
+      - type: 'a_key' # A_KEY type of column
+        column_header: 'A_KEY' # naming of the column in the output file
+      - type: 'workflow_key' # WORKFLOW_KEY type of column
+        column_header: 'A_WORKFLOW_HISTORY_KEY'
+      - type: 'xpath' # xpath type of column 
+        value: '//ns2:header/ns2:version'
+        column_header: 'REV_NUMBER'
+        is_key: 'N' # value is transposed across the rows - YES/NO. Used when there is only single value in source XML
+      - type: 'xpath'
+        value: '//ns2:header/ns2:referenceDate'
+        column_header: 'REF_DATE'
+        is_key: 'N'
+      - type: 'static'
+        value: ''
+        column_header: 'FREE_TEXT'
+
+  - task_name: ou_lm_standing_facilities_create_file
+    ods_prefix: INBOX/LM/STANDING_FACILITIES/STANDING_FACILITIES
+    output_table: standing_facilities
+    namespaces:
+      ns2: 'http://escb.ecb.int/sf'
+    output_columns:
+      - type: 'a_key'
+        column_header: 'A_KEY'
+      - type: 'workflow_key'
+        column_header: 'A_SFH_FK'
+      - type: 'workflow_key'
+        column_header: 'A_WORKFLOW_HISTORY_KEY'
+      - type: 'xpath'
+        value: '//ns2:disaggregatedStandingFacilities/ns2:standingFacilities/ns2:disaggregatedStandingFacility/ns2:country'
+        column_header: 'COUNTRY'
+      - type: 'static'
+        value: ''
+        column_header: 'COMMENT_'
+
+```
+
+Example of CSV task configuration:
+
+```yaml
+tasks:
+  - task_name: ODS_CSDB_DEBT_DAILY_process_csv 
+    ods_prefix: ODS/CSDB/DEBT_DAILY
+    output_table: DEBT_DAILY
+    output_columns:
+      - type: 'a_key'
+        column_header: 'A_KEY'
+      - type: 'workflow_key'
+        column_header: 'A_WORKFLOW_HISTORY_KEY'
+      - type: 'csv_header' # csv_header type of column
+        value: 'Date last modified' # naming of the column in the SOURCE file
+        column_header: 'Date last modified' # naming of the column in the OUTPUT file
+      - type: 'csv_header'
+        value: 'Extraction date'
+        column_header: 'Extraction date'
+      - type: 'csv_header'
+        value: 'ISIN code'
+        column_header: 'ISIN code'
+```
+
+## Development
+
+### Installing requirements
+
+Install app + dev requirements. For easier workflow, you can install in editable mode
+
+```
+pip install -e .[dev]
+```
+
+In editable mode, instead of copying the package files to the site-packages directory, pip creates a special link that points to the source code directory. This means any changes you make to your source code will be immediately available without needing to reinstall the package.
+
+### Code formattting
+
+Run black to reformat the code before pushing changes.
+
+Following will reformat all files recursively from current dir. 
+
+```
+black .
+```
+
+Following will only check and report what needs to be formatted, recursively from current dir.
+
+```
+black --check --diff .
+```
+
+### Tests
+
+Run tests with
+
+```
+pytest .
+```
+
+### Tox automation
+
+Tox automates runs of black checks and tests
+
+```
+tox .
+```
--- a/python/mrds_common/mrds/init.py
+++ b/python/mrds_common/mrds/init.py
@@ -0,0 +1 @@
+__version__ = "0.6.0"
--- a/python/mrds_common/mrds/cli.py
+++ b/python/mrds_common/mrds/cli.py
@@ -0,0 +1,117 @@
+import click
+import json
+import logging
+import sys
+
+from mrds import __version__
+from mrds.core import main
+
+
+@click.command()
+@click.version_option(version=__version__, prog_name="mrds")
+@click.option(
+    "--workflow-context",
+    "-w",
+    required=False,
+    help="Workflow context to be used by the application. This is required unless --generate-workflow-context is provided.",
+)
+@click.option(
+    "--source-filename",
+    "-s",
+    required=True,
+    help="Source filename to be processed.",
+)
+@click.option(
+    "--config-file",
+    "-c",
+    type=click.Path(exists=True),
+    required=True,
+    help="Path to the YAML configuration file.",
+)
+@click.option(
+    "--generate-workflow-context",
+    is_flag=True,
+    default=False,
+    help="Generate a workflow context automatically. If this is set, --workflow-context is not required.",
+)
+@click.option(
+    "--keep-source-file",
+    is_flag=True,
+    default=False,
+    help="Keep source file, instead of deleting it.",
+)
+@click.option(
+    "--keep-tmp-dir",
+    is_flag=True,
+    default=False,
+    help="Keep tmp directory, instead of deleting it.",
+)
+def cli_main(
+    workflow_context,
+    source_filename,
+    config_file,
+    generate_workflow_context,
+    keep_source_file,
+    keep_tmp_dir,
+):
+
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s - %(message)s",
+        handlers=[
+            logging.StreamHandler(sys.stdout),
+        ],
+    )
+
+    # Handle conflicting options
+    if workflow_context and generate_workflow_context:
+        raise click.UsageError(
+            "You cannot use both --workflow-context and --generate-workflow-context at the same time. "
+            "Please provide only one."
+        )
+
+    # Enforce that either --workflow-context or --generate-workflow-context must be provided
+    if not workflow_context and not generate_workflow_context:
+        raise click.UsageError(
+            "You must provide --workflow-context or use --generate-workflow-context flag."
+        )
+
+    # Parse and validate the workflow_context if provided
+    if workflow_context:
+        try:
+            workflow_context = json.loads(workflow_context)
+        except json.JSONDecodeError as e:
+            raise click.UsageError(f"Invalid JSON for --workflow-context: {e}")
+
+        # Validate that the workflow_context matches the expected structure
+        if (
+            not isinstance(workflow_context, dict)
+            or "run_id" not in workflow_context
+            or "a_workflow_history_key" not in workflow_context
+        ):
+            raise click.UsageError(
+                "Invalid workflow context structure. It must be a JSON object with 'run_id' and 'a_workflow_history_key'."
+            )
+
+    # Call the core processing function
+    main(
+        workflow_context,
+        source_filename,
+        config_file,
+        generate_workflow_context,
+        keep_source_file,
+        keep_tmp_dir,
+    )
+
+
+if __name__ == "__main__":
+    try:
+        cli_main()
+        sys.exit(0)
+    except click.UsageError as e:
+        logging.error(f"Usage error: {e}")
+        sys.exit(2)
+    except Exception as e:
+        logging.error(f"Unexpected error: {e}")
+        sys.exit(1)
--- a/python/mrds_common/mrds/core.py
+++ b/python/mrds_common/mrds/core.py
@@ -0,0 +1,366 @@
+import os
+import uuid
+import logging
+import yaml
+import zipfile
+import tempfile
+from dataclasses import dataclass, field
+
+from mrds import __version__
+
+from mrds.processors import get_file_processor
+from mrds.utils import (
+    manage_runs,
+    objectstore,
+    static_vars,
+    xml_utils,
+)
+
+
+# environment variables
+MRDS_ENV = os.getenv("MRDS_ENV", "poc")
+BUCKET = os.getenv("INBOX_BUCKET", "mrds_inbox_poc")
+BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE", "frcnomajoc7v")
+
+
+# Static configuration variables
+WORKFLOW_TYPE = "ODS"
+ENCODING_TYPE = "utf-8"
+
+CONFIG_REQUIRED_KEYS = [
+    "tmpdir",
+    "inbox_prefix",
+    "archive_prefix",
+    "workflow_name",
+    "validation_schema_path",
+    "tasks",
+    "file_type",
+]
+
+TASK_REQUIRED_KEYS = [
+    "task_name",
+    "ods_prefix",
+    "output_table",
+    "output_columns",
+]
+
+STATUS_SUCCESS = static_vars.status_success
+STATUS_FAILURE = static_vars.status_failed
+
+
+@dataclass
+class GlobalConfig:
+    tmpdir: str
+    inbox_prefix: str
+    archive_prefix: str
+    workflow_name: str
+    source_filename: str
+    validation_schema_path: str
+    bucket: str
+    bucket_namespace: str
+    file_type: str
+    encoding_type: str
+
+    def __post_init__(self):
+        self.original_source_filename = self.source_filename # keep this in case we have a zip file to archive
+
+    @property
+    def source_filepath(self) -> str:
+        return os.path.join(self.tmpdir, self.source_filename)
+    
+    @property
+    def original_source_filepath(self) -> str:
+        return os.path.join(self.tmpdir, self.original_source_filename)
+
+
+@dataclass
+class TaskConfig:
+    task_name: str
+    ods_prefix: str
+    output_table: str
+    namespaces: dict
+    output_columns: list
+
+
+def initialize_config(source_filename, config_file_path):
+    logging.info(f"Source filename is set to: {source_filename}")
+    logging.info(f"Loading configuration from {config_file_path}")
+    # Ensure the file exists
+    if not os.path.exists(config_file_path):
+        raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
+
+    # Load the configuration
+    with open(config_file_path, "r") as f:
+        config_data = yaml.safe_load(f)
+    logging.debug(f"Configuration data: {config_data}")
+
+    missing_keys = [key for key in CONFIG_REQUIRED_KEYS if key not in config_data]
+    if missing_keys:
+        raise ValueError(f"Missing required keys in configuration: {missing_keys}")
+
+    # Create GlobalConfig instance
+    global_config = GlobalConfig(
+        tmpdir=config_data["tmpdir"],
+        inbox_prefix=config_data["inbox_prefix"],
+        archive_prefix=config_data["archive_prefix"],
+        workflow_name=config_data["workflow_name"],
+        source_filename=source_filename,
+        validation_schema_path=config_data["validation_schema_path"],
+        bucket=BUCKET,
+        bucket_namespace=BUCKET_NAMESPACE,
+        file_type=config_data["file_type"],
+        encoding_type=config_data.get("encoding_type", ENCODING_TYPE),
+    )
+
+    # Create list of TaskConfig instances
+    tasks_data = config_data["tasks"]
+    tasks = []
+    for task_data in tasks_data:
+        # Validate required keys in task_data
+        missing_task_keys = [key for key in TASK_REQUIRED_KEYS if key not in task_data]
+        if missing_task_keys:
+            raise ValueError(
+                f"Missing required keys in task configuration: {missing_task_keys}"
+            )
+
+        task = TaskConfig(
+            task_name=task_data["task_name"],
+            ods_prefix=task_data["ods_prefix"],
+            output_table=task_data["output_table"],
+            namespaces=task_data.get("namespaces", {}),
+            output_columns=task_data["output_columns"],
+        )
+        tasks.append(task)
+
+    return global_config, tasks
+
+
+def initialize_workflow(global_config):
+
+    run_id = str(uuid.uuid4())
+
+    logging.info(f"Initializing workflow '{global_config.workflow_name}'")
+    a_workflow_history_key = manage_runs.init_workflow(
+        WORKFLOW_TYPE, global_config.workflow_name, run_id
+    )
+
+    return {
+        "run_id": run_id,
+        "a_workflow_history_key": a_workflow_history_key,
+    }
+
+
+def download_source_file(client, global_config):
+    logging.info(
+        f"Downloading source file '{global_config.source_filename}' "
+        f"from '{global_config.bucket}/{global_config.inbox_prefix}'"
+    )
+    objectstore.download_file(
+        client,
+        global_config.bucket_namespace,
+        global_config.bucket,
+        global_config.inbox_prefix,
+        global_config.source_filename,
+        global_config.source_filepath,
+    )
+    logging.info(f"Source file downloaded to '{global_config.source_filepath}'")
+
+
+def delete_source_file(client, global_config):
+
+    logging.info(
+        f"Deleting source file '{global_config.bucket}/{global_config.inbox_prefix}/{global_config.original_source_filename}'"
+    )
+    objectstore.delete_file(
+        client,
+        global_config.original_source_filename,
+        global_config.bucket_namespace,
+        global_config.bucket,
+        global_config.inbox_prefix,
+    )
+    logging.info(
+        f"Deleted source file '{global_config.bucket}/{global_config.inbox_prefix}/{global_config.original_source_filename}'"
+    )
+
+
+def archive_source_file(client, global_config):
+
+    logging.info(
+        f"Archiving source file to '{global_config.bucket}/{global_config.archive_prefix}/{global_config.original_source_filename}'"
+    )
+    objectstore.upload_file(
+        client,
+        global_config.original_source_filepath,
+        global_config.bucket_namespace,
+        global_config.bucket,
+        global_config.archive_prefix,
+        global_config.original_source_filename,
+    )
+    logging.info(
+        f"Source file archived to '{global_config.bucket}/{global_config.archive_prefix}/{global_config.original_source_filename}'"
+    )
+
+
+def unzip_source_file_if_needed(global_config):
+    source_filepath = global_config.source_filepath
+
+    # If it's not a zip, nothing to do
+    if not zipfile.is_zipfile(source_filepath):
+        logging.info(f"File '{source_filepath}' is not a ZIP file.")
+        return True
+
+    logging.info(f"File '{source_filepath}' is a ZIP file. Unzipping...")
+
+    extract_dir = os.path.dirname(source_filepath)
+
+    try:
+        with zipfile.ZipFile(source_filepath, "r") as zip_ref:
+            extracted_files = zip_ref.namelist()
+
+            if len(extracted_files) != 1:
+                logging.error(
+                    f"Expected one file in the ZIP, but found {len(extracted_files)} files."
+                )
+                return False
+
+            # Extract everything
+            zip_ref.extractall(extract_dir)
+
+    except Exception as e:
+        logging.error(f"Error while extracting '{source_filepath}': {e}")
+        return False
+
+    # Update the global_config to point to the extracted file
+    extracted_filename = extracted_files[0]
+    global_config.source_filename = extracted_filename
+
+    logging.info(
+        f"Extracted '{extracted_filename}' to '{extract_dir}'. "
+        f"Updated source_filepath to '{global_config.source_filepath}'."
+    )
+
+    return True
+
+
+def validate_source_file(global_config):
+    file_type = global_config.file_type.lower()
+
+    if file_type == "xml":
+        xml_is_valid, xml_validation_message = xml_utils.validate_xml(
+            global_config.source_filepath, global_config.validation_schema_path
+        )
+        if not xml_is_valid:
+            raise ValueError(f"XML validation failed: {xml_validation_message}")
+        logging.info(xml_validation_message)
+
+    elif file_type == "csv":
+        # TODO: add CSV validation here
+        pass
+
+    else:
+        raise ValueError(f"Unsupported file type: {file_type}")
+
+    return True
+
+
+def process_tasks(tasks, global_config, workflow_context, client):
+
+    # get appropriate task processor
+    processor_class = get_file_processor(global_config)
+
+    for task_conf in tasks:
+
+        logging.info(f"Starting task '{task_conf.task_name}'")
+        file_processor = processor_class(
+            global_config, task_conf, client, workflow_context
+        )
+        file_processor.process()
+
+
+def finalize_workflow(workflow_context, success=True):
+    status = STATUS_SUCCESS if success else STATUS_FAILURE
+    manage_runs.finalise_workflow(workflow_context["a_workflow_history_key"], status)
+    if success:
+        logging.info("Workflow completed successfully")
+    else:
+        logging.error("Workflow failed")
+
+
+def main(
+    workflow_context: dict,
+    source_filename: str,
+    config_file_path: str,
+    generate_workflow_context=False,
+    keep_source_file=False,
+    keep_tmp_dir=False,
+):
+
+    logging.info(f"Initializing mrds app, version {__version__}")
+
+    tmpdir_manager = None
+
+    try:
+        # get configs
+        global_config, tasks = initialize_config(source_filename, config_file_path)
+
+        # Handle temporary dirs
+        if keep_tmp_dir:
+            tmpdir = tempfile.mkdtemp(
+                prefix="mrds_", dir=global_config.tmpdir
+            )  # dir is created and never deleted
+            logging.info(
+                f"Created temporary working directory (not auto-deleted): {tmpdir}"
+            )
+        else:
+            tmpdir_manager = tempfile.TemporaryDirectory(
+                prefix="mrds_", dir=global_config.tmpdir
+            )
+            tmpdir = tmpdir_manager.name
+            logging.info(
+                f"Created temporary working directory (auto-deleted): {tmpdir}"
+            )
+
+        # override tmpdir with newly created tmpdir
+        global_config.tmpdir = tmpdir
+
+        client = objectstore.get_client()
+
+        # Handle workflow_context generation if required
+        if generate_workflow_context:
+            logging.info("Generating workflow context automatically.")
+            workflow_context = initialize_workflow(global_config)
+            logging.info(f"Generated workflow context: {workflow_context}")
+        else:
+            logging.info(f"Using provided workflow context: {workflow_context}")
+
+        download_source_file(client, global_config)
+        unzip_source_file_if_needed(global_config)
+        validate_source_file(global_config)
+        process_tasks(tasks, global_config, workflow_context, client)
+
+        if generate_workflow_context:
+            finalize_workflow(workflow_context)
+
+        if not keep_source_file:
+            archive_source_file(client, global_config)
+            delete_source_file(client, global_config)
+
+    except Exception as e:
+        logging.error(f"Critical error: {str(e)}")
+
+        # Finalize workflow with failure if needed
+        if generate_workflow_context and "workflow_context" in locals():
+            finalize_workflow(workflow_context, success=False)
+
+        raise RuntimeError(f"Workflow failed due to: {e}")
+
+    finally:
+        # Always attempt to remove tmpdir if created a TemporaryDirectory manager
+        if tmpdir_manager and not keep_tmp_dir:
+            try:
+                tmpdir_manager.cleanup()
+                logging.info(f"Deleted temporary working directory {tmpdir}")
+            except Exception:
+                logging.exception(
+                    f"Failed to delete up temporary working directory {tmpdir}"
+                )
--- a/python/mrds_common/mrds/docs/rqsd_sample.yaml
+++ b/python/mrds_common/mrds/docs/rqsd_sample.yaml
@@ -0,0 +1,186 @@
+# static configs
+tmpdir: /tmp
+inbox_prefix: INBOX/RQSD/RQSD_PROCESS
+workflow_name: w_ODS_RQSD_PROCESS_DEVO
+validation_schema_path: None
+file_type: csv
+
+# task configs
+tasks:
+  - task_name: m_ODS_RQSD_OBSERVATIONS_PARSE 
+    ods_prefix: INBOX/RQSD/RQSD_PROCESS/RQSD_OBSERVATIONS
+    output_table: RQSD_OBSERVATIONS
+    output_columns:
+      - type: 'workflow_key'
+        column_header: 'A_WORKFLOW_HISTORY_KEY'
+      - type: 'csv_header'
+        value: 'datacollectioncode'
+        column_header: 'datacollectioncode'
+      - type: 'csv_header'
+        value: 'datacollectionname'
+        column_header: 'datacollectionname'
+      - type: 'csv_header'
+        value: 'datacollectionowner'
+        column_header: 'datacollectionowner'
+      - type: 'csv_header'
+        value: 'reportingcyclename'
+        column_header: 'reportingcyclename'
+      - type: 'csv_header'
+        value: 'reportingcyclestatus'
+        column_header: 'reportingcyclestatus'
+      - type: 'csv_header'
+        value: 'modulecode'
+        column_header: 'modulecode'
+      - type: 'csv_header'
+        value: 'modulename'
+        column_header: 'modulename'
+      - type: 'csv_header'
+        value: 'moduleversionnumber'
+        column_header: 'moduleversionnumber'
+      - type: 'csv_header'
+        value: 'reportingentitycollectionuniqueid'
+        column_header: 'reportingentitycollectionuniqueid'
+      - type: 'csv_header'
+        value: 'entityattributereportingcode'
+        column_header: 'entityattributereportingcode'
+      - type: 'csv_header'
+        value: 'reportingentityname'
+        column_header: 'reportingentityname'
+      - type: 'csv_header'
+        value: 'reportingentityentitytype'
+        column_header: 'reportingentityentitytype'
+      - type: 'csv_header'
+        value: 'entityattributecountry'
+        column_header: 'entityattributecountry'
+      - type: 'csv_header'
+        value: 'entitygroupentityname'
+        column_header: 'entitygroupentityname'
+      - type: 'csv_header'
+        value: 'obligationmodulereferencedate'
+        column_header: 'obligationmodulereferencedate'
+      - type: 'csv_header'
+        value: 'obligationmoduleremittancedate'
+        column_header: 'obligationmoduleremittancedate'
+      - type: 'csv_header'
+        value: 'receivedfilereceiveddate'
+        column_header: 'receivedfilereceiveddate'
+      - type: 'csv_header'
+        value: 'obligationmoduleexpected'
+        column_header: 'obligationmoduleexpected'
+      - type: 'csv_header'
+        value: 'receivedfileversionnumber'
+        column_header: 'receivedfileversionnumber'
+      - type: 'csv_header'
+        value: 'revalidationversionnumber'
+        column_header: 'revalidationversionnumber'
+      - type: 'csv_header'
+        value: 'revalidationdate'
+        column_header: 'revalidationdate'
+      - type: 'csv_header'
+        value: 'receivedfilesystemfilename'
+        column_header: 'receivedfilesystemfilename'
+      - type: 'csv_header'
+        value: 'obligationstatusstatus'
+        column_header: 'obligationstatusstatus'
+      - type: 'csv_header'
+        value: 'filestatussetsubmissionstatus'
+        column_header: 'filestatussetsubmissionstatus'
+      - type: 'csv_header'
+        value: 'filestatussetvalidationstatus'
+        column_header: 'filestatussetvalidationstatus'
+      - type: 'csv_header'
+        value: 'filestatussetexternalvalidationstatus'
+        column_header: 'filestatussetexternalvalidationstatus'
+      - type: 'csv_header'
+        value: 'numberoferrors'
+        column_header: 'numberoferrors'
+      - type: 'csv_header'
+        value: 'numberofwarnings'
+        column_header: 'numberofwarnings'
+      - type: 'csv_header'
+        value: 'delayindays'
+        column_header: 'delayindays'
+      - type: 'csv_header'
+        value: 'failedattempts'
+        column_header: 'failedattempts'
+      - type: 'csv_header'
+        value: 'observationvalue'
+        column_header: 'observationvalue'
+      - type: 'csv_header'
+        value: 'observationtextvalue'
+        column_header: 'observationtextvalue'
+      - type: 'csv_header'
+        value: 'observationdatevalue'
+        column_header: 'observationdatevalue'
+      - type: 'csv_header'
+        value: 'datapointsetdatapointidentifier'
+        column_header: 'datapointsetdatapointidentifier'
+      - type: 'csv_header'
+        value: 'datapointsetlabel'
+        column_header: 'datapointsetlabel'
+      - type: 'csv_header'
+        value: 'obsrvdescdatatype'
+        column_header: 'obsrvdescdatatype'
+      - type: 'csv_header'
+        value: 'ordinatecode'
+        column_header: 'ordinatecode'
+      - type: 'csv_header'
+        value: 'ordinateposition'
+        column_header: 'ordinateposition'
+      - type: 'csv_header'
+        value: 'tablename'
+        column_header: 'tablename'
+      - type: 'csv_header'
+        value: 'isstock'
+        column_header: 'isstock'
+      - type: 'csv_header'
+        value: 'scale'
+        column_header: 'scale'
+      - type: 'csv_header'
+        value: 'currency'
+        column_header: 'currency'
+      - type: 'csv_header'
+        value: 'numbertype'
+        column_header: 'numbertype'
+      - type: 'csv_header'
+        value: 'ismandatory'
+        column_header: 'ismandatory'
+      - type: 'csv_header'
+        value: 'decimalplaces'
+        column_header: 'decimalplaces'
+      - type: 'csv_header'
+        value: 'serieskey'
+        column_header: 'serieskey'
+      - type: 'csv_header'
+        value: 'tec_source_system'
+        column_header: 'tec_source_system'
+      - type: 'csv_header'
+        value: 'tec_dataset'
+        column_header: 'tec_dataset'
+      - type: 'csv_header'
+        value: 'tec_surrogate_key'
+        column_header: 'tec_surrogate_key'
+      - type: 'csv_header'
+        value: 'tec_crc'
+        column_header: 'tec_crc'
+      - type: 'csv_header'
+        value: 'tec_ingestion_date'
+        column_header: 'tec_ingestion_date'
+      - type: 'csv_header'
+        value: 'tec_version_id'
+        column_header: 'tec_version_id'
+      - type: 'csv_header'
+        value: 'tec_execution_date'
+        column_header: 'tec_execution_date'
+      - type: 'csv_header'
+        value: 'tec_run_id'
+        column_header: 'tec_run_id'
+      - type: 'static'
+        value: 'test test'
+        column_header: 'BLABLA'
+      - type: 'a_key'
+        column_header: 'A_KEY'
+      - type: 'csv_header'
+        value: 'tec_business_date'
+        column_header: 'tec_business_dateTest!'
+
--- a/python/mrds_common/mrds/docs/upload.py
+++ b/python/mrds_common/mrds/docs/upload.py
@@ -0,0 +1,50 @@
+# file uploader
+import os
+import sys
+import logging
+from mrds.utils import objectstore
+
+BUCKET = os.getenv("INBOX_BUCKET", "mrds_inbox_poc")
+BUCKET_NAMESPACE = os.getenv("BUCKET_NAMESPACE", "frcnomajoc7v")
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s - %(message)s",
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+    ],
+)
+
+
+source_filepath = '/home/dbt/tmp/mrds_4twsw_ib/20250630_Pre-Production_DV_P2_DBT_I4.zip'
+source_filename = '20250630_Pre-Production_DV_P2_DBT_I4.zip'
+target_prefix = 'INBOX/CSDB/STC_CentralizedSecuritiesDissemination_ECB'
+
+def upload_file():
+
+    client = objectstore.get_client()
+
+    logging.info(
+        f"uploading source file to '{BUCKET}/{target_prefix}/{source_filename}'"
+    )
+    objectstore.upload_file(
+        client,
+        source_filepath,
+        BUCKET_NAMESPACE,
+        BUCKET,
+        target_prefix,
+        source_filename,
+    )
+    logging.info(
+        f"Source file uploaded to '{BUCKET}/{target_prefix}/{source_filename}'"
+    )
+
+
+if __name__ == "__main__":
+    try:
+        upload_file()
+        sys.exit(0)
+    except Exception as e:
+        logging.error(f"Unexpected error: {e}")
+        sys.exit(1)
--- a/python/mrds_common/mrds/processors/init.py
+++ b/python/mrds_common/mrds/processors/init.py
@@ -0,0 +1,15 @@
+from .xml_processor import XMLTaskProcessor
+from .csv_processor import CSVTaskProcessor
+
+
+def get_file_processor(global_config):
+    """
+    Factory function to get the appropriate file processor class based on the file type in the global configuration.
+    """
+    file_type = global_config.file_type.lower()
+    if file_type == "xml":
+        return XMLTaskProcessor
+    elif file_type == "csv":
+        return CSVTaskProcessor
+    else:
+        raise ValueError(f"Unsupported file type: {file_type}")
--- a/python/mrds_common/mrds/processors/base.py
+++ b/python/mrds_common/mrds/processors/base.py
@@ -0,0 +1,211 @@
+import logging
+import os
+import csv
+from abc import ABC, abstractmethod
+
+from mrds.utils.utils import parse_output_columns
+
+from mrds.utils import (
+    manage_files,
+    manage_runs,
+    objectstore,
+    static_vars,
+)
+
+
+OUTPUT_FILENAME_TEMPLATE = "{output_table}-{task_history_key}.csv"
+STATUS_SUCCESS = static_vars.status_success  # duplicated needs to be moved #TODO
+
+
+class TaskProcessor(ABC):
+    def __init__(self, global_config, task_conf, client, workflow_context):
+        self.global_config = global_config
+        self.task_conf = task_conf
+        self.client = client
+        self.workflow_context = workflow_context
+        self._init_common()
+        self._post_init()
+
+    def _init_common(self):
+        # Initialize task
+        self.a_task_history_key = manage_runs.init_task(
+            self.task_conf.task_name,
+            self.workflow_context["run_id"],
+            self.workflow_context["a_workflow_history_key"],
+        )
+        logging.info(f"Task initialized with history key: {self.a_task_history_key}")
+
+        # Define output file paths
+        self.output_filename = OUTPUT_FILENAME_TEMPLATE.format(
+            output_table=self.task_conf.output_table,
+            task_history_key=self.a_task_history_key,
+        )
+        self.output_filepath = os.path.join(
+            self.global_config.tmpdir, self.output_filename
+        )
+
+        # Parse the output_columns
+        (
+            self.xpath_entries,
+            self.csv_entries,
+            self.static_entries,
+            self.a_key_entries,
+            self.workflow_key_entries,
+            self.xml_position_entries,
+            self.column_order,
+        ) = parse_output_columns(self.task_conf.output_columns)
+
+    def _post_init(self):
+        """Optional hook for classes to override"""
+        pass
+
+    @abstractmethod
+    def _extract(self):
+        """Non-optional hook for classes to override"""
+        pass
+
+    def _enrich(self):
+        """
+        Stream-based enrich: read one row at a time, append static/A-key/workflow-key,
+        reorder columns, and write out immediately.
+        """
+
+        TASK_HISTORY_MULTIPLIER = 1_000_000_000
+
+        logging.info(f"Enriching CSV file at '{self.output_filepath}'")
+
+        temp_output = self.output_filepath + ".tmp"
+        encoding = self.global_config.encoding_type
+
+        with open(self.output_filepath, newline="", encoding=encoding) as inf, open(
+            temp_output, newline="", encoding=encoding, mode="w"
+        ) as outf:
+
+            reader = csv.reader(inf)
+            writer = csv.writer(outf, quoting=csv.QUOTE_ALL)
+
+            # Read the original header
+            original_headers = next(reader)
+
+            # Compute the full set of headers
+            headers = list(original_headers)
+
+            # Add static column headers if missing
+            for col_name, _ in self.static_entries:
+                if col_name not in headers:
+                    headers.append(col_name)
+
+            #  Add A-key column headers if missing
+            for col_name in self.a_key_entries:
+                if col_name not in headers:
+                    headers.append(col_name)
+
+            # Add workflow key column headers if missing
+            for col_name in self.workflow_key_entries:
+                if col_name not in headers:
+                    headers.append(col_name)
+
+            # Rearrange headers to desired ordr
+            header_to_index = {h: i for i, h in enumerate(headers)}
+            out_indices = [
+                header_to_index[h] for h in self.column_order if h in header_to_index
+            ]
+            out_headers = [headers[i] for i in out_indices]
+
+            # Write the new header
+            writer.writerow(out_headers)
+
+            # Stream each row, enrich in-place, reorder, and write
+            row_count = 0
+            base_task_history = int(self.a_task_history_key) * TASK_HISTORY_MULTIPLIER
+
+            for i, in_row in enumerate(reader, start=1):
+                # Build a working list that matches `headers` order.
+                #  Start by copying the existing columns (or '' if missing)
+                work_row = [None] * len(headers)
+                for j, h in enumerate(original_headers):
+                    idx = header_to_index[h]
+                    work_row[idx] = in_row[j]
+
+                # Fill static columns
+                for col_name, value in self.static_entries:
+                    idx = header_to_index[col_name]
+                    work_row[idx] = value
+
+                # Fill A-key columns
+                for col_name in self.a_key_entries:
+                    idx = header_to_index[col_name]
+                    a_key_value = base_task_history + i
+                    work_row[idx] = str(a_key_value)
+
+                # Fill workflow key columns
+                wf_val = self.workflow_context["a_workflow_history_key"]
+                for col_name in self.workflow_key_entries:
+                    idx = header_to_index[col_name]
+                    work_row[idx] = wf_val
+
+                # Reorder to output order and write
+                out_row = [work_row[j] for j in out_indices]
+                writer.writerow(out_row)
+                row_count += 1
+
+        # Atomically replace
+        os.replace(temp_output, self.output_filepath)
+        logging.info(
+            f"CSV file enriched at '{self.output_filepath}', {row_count} rows generated"
+        )
+
+    def _upload(self):
+        # Upload CSV to object store
+        logging.info(
+            f"Uploading CSV file to '{self.global_config.bucket}/{self.task_conf.ods_prefix}/{self.output_filename}'"
+        )
+        objectstore.upload_file(
+            self.client,
+            self.output_filepath,
+            self.global_config.bucket_namespace,
+            self.global_config.bucket,
+            self.task_conf.ods_prefix,
+            self.output_filename,
+        )
+        logging.info(
+            f"CSV file uploaded to '{self.global_config.bucket}/{self.task_conf.ods_prefix}/{self.output_filename}'"
+        )
+
+    def _process_remote(self):
+        # Process the source file
+        logging.info(f"Processing source file '{self.output_filename}' with CT_MRDS.FILE_MANAGER.PROCESS_SOURCE_FILE database function.")
+        try:
+            manage_files.process_source_file(
+                self.task_conf.ods_prefix, self.output_filename
+            )
+        except Exception as e:
+            logging.error(
+                f"Processing source file '{self.output_filename}' failed. Cleaning up..."
+            )
+            objectstore.delete_file(
+                self.client,
+                self.output_filename,
+                self.global_config.bucket_namespace,
+                self.global_config.bucket,
+                self.task_conf.ods_prefix,
+            )
+            logging.error(
+                f"CSV file '{self.global_config.bucket}/{self.task_conf.ods_prefix}/{self.output_filename}' deleted."
+            )
+            raise
+        else:
+            logging.info(f"Source file '{self.output_filename}' processed")
+
+    def _finalize(self):
+        # Finalize task
+        manage_runs.finalise_task(self.a_task_history_key, STATUS_SUCCESS)
+        logging.info(f"Task '{self.task_conf.task_name}' completed successfully")
+
+    def process(self):
+        # main processor function
+        self._extract()
+        self._enrich()
+        self._upload()
+        self._process_remote()
+        self._finalize()
--- a/python/mrds_common/mrds/processors/csv_processor.py
+++ b/python/mrds_common/mrds/processors/csv_processor.py
@@ -0,0 +1,52 @@
+import logging
+import csv
+import os
+from .base import TaskProcessor
+
+
+class CSVTaskProcessor(TaskProcessor):
+
+    def _extract(self):
+        input_path = self.global_config.source_filepath
+        output_path = self.output_filepath
+        encoding = self.global_config.encoding_type
+
+        logging.info(f"Reading source CSV file at '{input_path}'")
+
+        # Open both input & output at once for streaming row-by-row
+        temp_output = output_path + ".tmp"
+        with open(input_path, newline="", encoding=encoding) as inf, open(
+            temp_output, newline="", encoding=encoding, mode="w"
+        ) as outf:
+
+            reader = csv.reader(inf)
+            writer = csv.writer(outf, quoting=csv.QUOTE_ALL)
+
+            # Read and parse the header
+            headers = next(reader)
+
+            # Build the list of headers to keep + their new names
+            headers_to_keep = [old for _, old in self.csv_entries]
+            headers_rename = [new for new, _ in self.csv_entries]
+
+            # Check if all specified headers exist in the input file
+            missing = [h for h in headers_to_keep if h not in headers]
+            if missing:
+                raise ValueError(
+                    f"The following headers are not in the input CSV: {missing}"
+                )
+
+            # Determine the indices of the headers to keep
+            indices = [headers.index(old) for old in headers_to_keep]
+
+            # Write the renamed header
+            writer.writerow(headers_rename)
+
+            # Stream through every data row and write out the filtered columns
+            for row in reader:
+                filtered = [row[i] for i in indices]
+                writer.writerow(filtered)
+
+        # Atomically replace the old file
+        os.replace(temp_output, output_path)
+        logging.info(f"Core data written to CSV file at '{output_path}'")
--- a/python/mrds_common/mrds/processors/xml_processor.py
+++ b/python/mrds_common/mrds/processors/xml_processor.py
@@ -0,0 +1,30 @@
+import logging
+
+from .base import TaskProcessor
+
+from mrds.utils import (
+    xml_utils,
+    csv_utils,
+)
+
+
+class XMLTaskProcessor(TaskProcessor):
+        
+    def _extract(self):
+        # Extract data from XML
+        csv_data = xml_utils.extract_data(
+            self.global_config.source_filepath,
+            self.xpath_entries,
+            self.xml_position_entries,
+            self.task_conf.namespaces,
+            self.workflow_context,
+            self.global_config.encoding_type,
+        )
+        logging.info(f"CSV data extracted for task '{self.task_conf.task_name}'")
+
+        # Generate CSV
+        logging.info(f"Writing core data to CSV file at '{self.output_filepath}'")
+        csv_utils.write_data_to_csv_file(
+            self.output_filepath, csv_data, self.global_config.encoding_type
+        )
+        logging.info(f"Core data written to CSV file at '{self.output_filepath}'")
--- a/python/mrds_common/mrds/utils/init.py
+++ b/python/mrds_common/mrds/utils/init.py
--- a/python/mrds_common/mrds/utils/csv_utils.py
+++ b/python/mrds_common/mrds/utils/csv_utils.py
@@ -0,0 +1,69 @@
+import csv
+import os
+
+TASK_HISTORY_MULTIPLIER = 1_000_000_000
+
+
+def read_csv_file(csv_filepath, encoding_type="utf-8"):
+    with open(csv_filepath, "r", newline="", encoding=encoding_type) as csvfile:
+        reader = list(csv.reader(csvfile))
+    headers = reader[0]
+    data_rows = reader[1:]
+    return headers, data_rows
+
+
+def write_data_to_csv_file(csv_filepath, data, encoding_type="utf-8"):
+    temp_csv_filepath = csv_filepath + ".tmp"
+    with open(temp_csv_filepath, "w", newline="", encoding=encoding_type) as csvfile:
+        writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
+        writer.writerow(data["headers"])
+        writer.writerows(data["rows"])
+    os.replace(temp_csv_filepath, csv_filepath)
+
+
+def add_static_columns(data_rows, headers, static_entries):
+    for column_header, value in static_entries:
+        if column_header not in headers:
+            headers.append(column_header)
+            for row in data_rows:
+                row.append(value)
+        else:
+            idx = headers.index(column_header)
+            for row in data_rows:
+                row[idx] = value
+
+
+def add_a_key_columns(data_rows, headers, a_key_entries, task_history_key):
+    for column_header in a_key_entries:
+        if column_header not in headers:
+            headers.append(column_header)
+            for i, row in enumerate(data_rows, start=1):
+                a_key_value = int(task_history_key) * TASK_HISTORY_MULTIPLIER + i
+                row.append(str(a_key_value))
+        else:
+            idx = headers.index(column_header)
+            for i, row in enumerate(data_rows, start=1):
+                a_key_value = int(task_history_key) * TASK_HISTORY_MULTIPLIER + i
+                row[idx] = str(a_key_value)
+
+
+def add_workflow_key_columns(data_rows, headers, workflow_key_entries, workflow_key):
+    for column_header in workflow_key_entries:
+        if column_header not in headers:
+            headers.append(column_header)
+            for row in data_rows:
+                row.append(workflow_key)
+        else:
+            idx = headers.index(column_header)
+            for row in data_rows:
+                row[idx] = workflow_key
+
+
+def rearrange_columns(headers, data_rows, column_order):
+    header_to_index = {header: idx for idx, header in enumerate(headers)}
+    new_indices = [
+        header_to_index[header] for header in column_order if header in header_to_index
+    ]
+    headers = [headers[idx] for idx in new_indices]
+    data_rows = [[row[idx] for idx in new_indices] for row in data_rows]
+    return headers, data_rows
--- a/python/mrds_common/mrds/utils/manage_files.py
+++ b/python/mrds_common/mrds/utils/manage_files.py
@@ -0,0 +1,177 @@
+from . import oraconn
+from . import sql_statements
+from . import utils
+
+# Get the next load id from the sequence
+
+#
+# Workflows
+#
+
+
+def process_source_file_from_event(resource_id: str):
+    #
+    # expects object uri in the form /n/<namespace>/b/<bucket>/o/<object>
+    #  eg /n/frcnomajoc7v/b/dmarsdb1/o/sqlnet.log
+    # and calls process_source_file with prefix and file_name extracted from that uri
+    #
+
+    _, _, prefix, file_name = utils.parse_uri_with_regex(resource_id)
+    process_source_file(prefix, file_name)
+
+
+def process_source_file(prefix: str, filename: str):
+
+    sourcefile = f"{prefix.rstrip('/')}/{filename}"  # rstrip to cater for cases where the prefix is passed with a trailing slash
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        oraconn.run_proc(conn, "CT_MRDS.FILE_MANAGER.PROCESS_SOURCE_FILE", [sourcefile])
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def execute_query(query, query_parameters=None, account_alias="MRDS_LOADER"):
+    query_result = None
+    try:
+        conn = oraconn.connect(account_alias)
+        curs = conn.cursor()
+        if query_parameters != None:
+            curs.execute(query, query_parameters)
+        else:
+            curs.execute(query)
+        query_result = curs.fetchall()
+        conn.commit()
+    finally:
+        conn.close()
+    return [t[0] for t in query_result]
+
+
+def get_file_prefix(source_key, source_file_id, table_id):
+    query_result = None
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+        curs = conn.cursor()
+
+        curs.execute(
+            sql_statements.get_sql("get_file_prefix"),
+            [source_key, source_file_id, table_id],
+        )
+        query_result = curs.fetchone()
+        conn.commit()
+    finally:
+        conn.close()
+    return query_result[0]
+
+
+def get_inbox_bucket():
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        ret = oraconn.run_func(conn, "CT_MRDS.FILE_MANAGER.GET_INBOX_BUCKET", str, [])
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
+
+
+def get_data_bucket():
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        ret = oraconn.run_func(conn, "CT_MRDS.FILE_MANAGER.GET_DATA_BUCKET", str, [])
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
+
+
+def add_source_file_config(
+    source_key,
+    source_file_type,
+    source_file_id,
+    source_file_desc,
+    source_file_name_pattern,
+    table_id,
+    template_table_name,
+):
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        ret = oraconn.run_proc(
+            conn,
+            "CT_MRDS.FILE_MANAGER.ADD_SOURCE_FILE_CONFIG",
+            [
+                source_key,
+                source_file_type,
+                source_file_id,
+                source_file_desc,
+                source_file_name_pattern,
+                table_id,
+                template_table_name,
+            ],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
+
+
+def add_column_date_format(template_table_name, column_name, date_format):
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        ret = oraconn.run_proc(
+            conn,
+            "CT_MRDS.FILE_MANAGER.ADD_column_date_format",
+            [template_table_name, column_name, date_format],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
+
+
+def execute(stmt):
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+        curs = conn.cursor()
+        curs.execute(stmt)
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def create_external_table(table_name, template_table_name, prefix):
+    try:
+        conn = oraconn.connect("ODS_LOADER")
+
+        ret = oraconn.run_proc(
+            conn,
+            "CT_MRDS.FILE_MANAGER.CREATE_EXTERNAL_TABLE",
+            [table_name, template_table_name, prefix, get_bucket("ODS")],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
+
+
+def get_bucket(bucket):
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        ret = oraconn.run_func(
+            conn, "CT_MRDS.FILE_MANAGER.GET_BUCKET_URI", str, [bucket]
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
--- a/python/mrds_common/mrds/utils/manage_runs.py
+++ b/python/mrds_common/mrds/utils/manage_runs.py
@@ -0,0 +1,97 @@
+from . import oraconn
+from . import sql_statements
+from . import static_vars
+from . import manage_files
+
+
+def init_workflow(database_name: str, workflow_name: str, workflow_run_id: str):
+
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+        a_workflow_history_key = oraconn.run_func(
+            conn,
+            "CT_MRDS.WORKFLOW_MANAGER.INIT_WORKFLOW",
+            int,
+            [database_name, workflow_run_id, workflow_name],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    return a_workflow_history_key
+
+
+def finalise_workflow(a_workflow_history_key: int, workflow_status: str):
+
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        oraconn.run_proc(
+            conn,
+            "CT_MRDS.WORKFLOW_MANAGER.FINALISE_WORKFLOW",
+            [a_workflow_history_key, workflow_status],
+        )
+
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def init_task(task_name: str, task_run_id: str, a_workflow_history_key: int):
+
+    a_task_history_key: int
+
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+        a_task_history_key = oraconn.run_func(
+            conn,
+            "CT_MRDS.WORKFLOW_MANAGER.INIT_TASK",
+            int,
+            [task_run_id, task_name, a_workflow_history_key],
+        )
+
+        conn.commit()
+    finally:
+        conn.close()
+
+    return a_task_history_key
+
+
+def finalise_task(a_task_history_key: int, task_status: str):
+
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+        curs = conn.cursor()
+
+        curs.execute(
+            sql_statements.get_sql("finalise_task"), [task_status, a_task_history_key]
+        )
+
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def set_workflow_property(
+    wf_history_key: int, service_name: str, property: str, value: str
+):
+    try:
+        conn = oraconn.connect("MRDS_LOADER")
+
+        ret = oraconn.run_proc(
+            conn,
+            "CT_MRDS.WORKFLOW_MANAGER.SET_WORKFLOW_PROPERTY",
+            [wf_history_key, service_name, property, value],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    return ret
+
+
+def select_ods_tab(table_name: str, value: str, condition="1 = 1"):
+
+    query = "select %s from %s where %s" % (value, table_name, condition)
+    print("query = |%s|" % query)
+    return manage_files.execute_query(query=query, account_alias="ODS_LOADER")
--- a/python/mrds_common/mrds/utils/objectstore.py
+++ b/python/mrds_common/mrds/utils/objectstore.py
@@ -0,0 +1,53 @@
+import oci
+
+
+def get_client():
+    #
+    # Authentication is done using Instance Principals on VMs and Resouce Principal on OCI Container Instances
+    # The function first tries Resource Principal and fails back to Instance Principal in case of error
+    #
+    try:
+        signer = oci.auth.signers.get_resource_principals_signer()
+    except:
+        signer = signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
+
+    # Create secret client and retrieve content
+    client = oci.object_storage.ObjectStorageClient(
+        {}, signer=signer
+    )  # the first empyty bracket is an empty config
+    return client
+
+
+def list_bucket(client, namespace, bucket, prefix):
+    objects = client.list_objects(namespace, bucket, prefix=prefix)
+    # see https://docs.oracle.com/en-us/iaas/tools/python/2.135.0/api/request_and_response.html#oci.response.Response for all attrs
+    return objects.data
+
+
+def upload_file(client, source_filename, namespace, bucket, prefix, target_filename):
+    with open(source_filename, "rb") as in_file:
+        client.put_object(
+            namespace, bucket, f"{prefix.rstrip('/')}/{target_filename}", in_file
+        )
+
+
+def clean_folder(client, namespace, bucket, prefix):
+    objects = client.list_objects(namespace, bucket, prefix=prefix)
+    for o in objects.data.objects:
+        print(f"Deleting {prefix.rstrip('/')}/{o.name}")
+        client.delete_object(namespace, bucket, f"{o.name}")
+
+
+def delete_file(client, file, namespace, bucket, prefix):
+    client.delete_object(namespace, bucket, f"{prefix.rstrip('/')}/{file}")
+
+
+def download_file(client, namespace, bucket, prefix, source_filename, target_filename):
+    # Retrieve the file, streaming it into another file in 1 MiB chunks
+
+    get_obj = client.get_object(
+        namespace, bucket, f"{prefix.rstrip('/')}/{source_filename}"
+    )
+    with open(target_filename, "wb") as f:
+        for chunk in get_obj.data.raw.stream(1024 * 1024, decode_content=False):
+            f.write(chunk)
--- a/python/mrds_common/mrds/utils/oraconn.py
+++ b/python/mrds_common/mrds/utils/oraconn.py
@@ -0,0 +1,38 @@
+import oracledb
+import os
+import traceback
+import sys
+
+
+def connect(alias):
+
+    username = os.getenv(alias + "_DB_USER")
+    password = os.getenv(alias + "_DB_PASS")
+    tnsalias = os.getenv(alias + "_DB_TNS")
+    connstr = username + "/" + password + "@" + tnsalias
+
+    oracledb.init_oracle_client()
+
+    try:
+        conn = oracledb.connect(connstr)
+        return conn
+    except oracledb.DatabaseError as db_err:
+        tb = traceback.format_exc()
+        print(f"DatabaseError connecting to '{alias}': {db_err}\n{tb}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as exc:
+        tb = traceback.format_exc()
+        print(f"Unexpected error connecting to '{alias}': {exc}\n{tb}", file=sys.stderr)
+        sys.exit(1)
+
+
+def run_proc(connection, proc: str, param: []):
+    curs = connection.cursor()
+    curs.callproc(proc, param)
+
+
+def run_func(connection, proc: str, rettype, param: []):
+    curs = connection.cursor()
+    ret = curs.callfunc(proc, rettype, param)
+
+    return ret
--- a/python/mrds_common/mrds/utils/secrets.py
+++ b/python/mrds_common/mrds/utils/secrets.py
@@ -0,0 +1,46 @@
+import oci
+import ast
+import base64
+
+# Specify the OCID of the secret to retrieve
+
+
+def get_secretcontents(ocid):
+    #
+    # Authentication is done using Instance Principals on VMs and Resouce Principal on OCI Container Instances
+    # The function first tries Resource Principal and fails back to Instance Principal in case of error
+    #
+    try:
+        signer = oci.auth.signers.get_resource_principals_signer()
+    except:
+        signer = signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
+
+    # Create secret client and retrieve content
+    secretclient = oci.secrets.SecretsClient({}, signer=signer)
+    secretcontents = secretclient.get_secret_bundle(secret_id=ocid)
+    return secretcontents
+
+
+def get_password(ocid):
+
+    secretcontents = get_secretcontents(ocid)
+
+    # Decode the secret from base64 and return password
+    keybase64 = secretcontents.data.secret_bundle_content.content
+    keybase64bytes = keybase64.encode("ascii")
+    keybytes = base64.b64decode(keybase64bytes)
+    key = keybytes.decode("ascii")
+    keydict = ast.literal_eval(key)
+    return keydict["password"]
+
+
+def get_secret(ocid):
+
+    # Create client
+    secretcontents = get_secretcontents(ocid)
+
+    # Decode the secret from base64 and return it
+    certbase64 = secretcontents.data.secret_bundle_content.content
+    certbytes = base64.b64decode(certbase64)
+    cert = certbytes.decode("UTF-8")
+    return cert
--- a/python/mrds_common/mrds/utils/security_utils.py
+++ b/python/mrds_common/mrds/utils/security_utils.py
@@ -0,0 +1,106 @@
+import re
+import logging
+
+
+def verify_run_id(run_id, context=None):
+    """
+    Verify run_id for security compliance.
+
+    Args:
+        run_id (str): The run_id to verify
+        context (dict, optional): Airflow context for logging
+
+    Returns:
+        str: Verified run_id
+
+    Raises:
+        ValueError: If run_id is invalid or suspicious
+    """
+    try:
+        # Basic checks
+        if not run_id or not isinstance(run_id, str):
+            raise ValueError(
+                f"Invalid run_id: must be non-empty string, got: {type(run_id).__name__}"
+            )
+
+        run_id = run_id.strip()
+
+        if len(run_id) < 1 or len(run_id) > 250:
+            raise ValueError(
+                f"Invalid run_id: length must be 1-250 chars, got: {len(run_id)}"
+            )
+
+        # Allow only safe characters
+        if not re.match(r"^[a-zA-Z0-9_\-:+.T]+$", run_id):
+            suspicious_chars = "".join(
+                set(
+                    char for char in run_id if not re.match(r"[a-zA-Z0-9_\-:+.T]", char)
+                )
+            )
+            logging.warning(f"SECURITY: Invalid chars in run_id: '{suspicious_chars}'")
+            raise ValueError("Invalid run_id: contains unsafe characters")
+
+        # Check for attack patterns
+        dangerous_patterns = [
+            r"\.\./",
+            r"\.\.\\",
+            r"<script",
+            r"javascript:",
+            r"union\s+select",
+            r"drop\s+table",
+            r"insert\s+into",
+            r"delete\s+from",
+            r"exec\s*\(",
+            r"system\s*\(",
+            r"eval\s*\(",
+            r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]",
+        ]
+
+        for pattern in dangerous_patterns:
+            if re.search(pattern, run_id, re.IGNORECASE):
+                logging.error(f"SECURITY: Dangerous pattern in run_id: '{run_id}'")
+                raise ValueError("Invalid run_id: contains dangerous pattern")
+
+        # Log success
+        if context:
+            dag_id = (
+                getattr(context.get("dag"), "dag_id", "unknown")
+                if context.get("dag")
+                else "unknown"
+            )
+            logging.info(f"run_id verified: '{run_id}' for DAG: '{dag_id}'")
+
+        return run_id
+
+    except Exception as e:
+        logging.error(
+            f"SECURITY: run_id verification failed: '{run_id}', Error: {str(e)}"
+        )
+        raise ValueError(f"run_id verification failed: {str(e)}")
+
+
+def get_verified_run_id(context):
+    """
+    Extract and verify run_id from Airflow context.
+
+    Args:
+        context (dict): Airflow context
+
+    Returns:
+        str: Verified run_id
+    """
+    try:
+        run_id = None
+        if context and "ti" in context:
+            run_id = context["ti"].run_id
+        elif context and "run_id" in context:
+            run_id = context["run_id"]
+
+        if not run_id:
+            raise ValueError("Could not extract run_id from context")
+
+        return verify_run_id(run_id, context)
+
+    except Exception as e:
+        logging.error(f"Failed to get verified run_id: {str(e)}")
+        raise
--- a/python/mrds_common/mrds/utils/sql_statements.py
+++ b/python/mrds_common/mrds/utils/sql_statements.py
@@ -0,0 +1,68 @@
+sql_statements = {}
+
+#
+# Workflows
+#
+
+# register_workflow: Register new DW load
+
+sql_statements[
+    "register_workflow"
+] = """INSERT INTO CT_MRDS.A_WORKFLOW_HISTORY 
+(A_WORKFLOW_HISTORY_KEY, WORKFLOW_RUN_ID, 
+WORKFLOW_NAME, WORKFLOW_START, WORKFLOW_SSUCCESSFUL)
+VALUES (:a_workflow_history_key, :workflow_run_id, :workflow_name, SYSTIMESTAMP, :running_status)
+"""
+
+# get_a_workflow_history_key: get new key from sequence
+sql_statements["get_a_workflow_history_key"] = (
+    "SELECT CT_MRDS.A_WORKFLOW_HISTORY_KEY_SEQ.NEXTVAL FROM DUAL"
+)
+
+# finalise: Update load record in A_LOAD_HISTORY after workflow completion
+sql_statements[
+    "finalise_workflow"
+] = """UPDATE CT_MRDS.A_WORKFLOW_HISTORY
+SET WORKFLOW_END = SYSTIMESTAMP, WORKFLOW_SUCCESSFUL = :workflow_status
+WHERE A_WORKFLOW_HISTORY_KEY = :a_workflow_history_key
+"""
+#
+# Tasks
+#
+
+# register_task
+
+sql_statements[
+    "register_task"
+] = """INSERT INTO CT_MRDS.A_TASK_HISTORY (A_TASK_HISTORY_KEY,
+A_WORKFLOW_HISTORY_KEY, TASK_RUN_ID, 
+TASK_NAME, TASK_START, TASK_SUCCESSFUL)
+VALUES (:a_workflow_history_key, :workflow_run_id, :workflow_name, SYSTIMESTAMP, :running_status)
+"""
+
+# get_a_task_history_key: get new key from sequence
+sql_statements["get_a_task_history_key"] = (
+    "SELECT CT_MRDS.A_TASK_HISTORY_KEY_SEQ.NEXTVAL FROM DUAL"
+)
+
+# finalise: Update load record in A_LOAD_HISTORY after workflow completion
+sql_statements[
+    "finalise_task"
+] = """UPDATE CT_MRDS.A_TASK_HISTORY
+SET TASK_END = SYSTIMESTAMP, TASK_SUCCESSFUL = :workflow_status
+WHERE A_TASK_HISTORY_KEY = :a_workflow_history_key
+"""
+
+#
+# Files
+#
+sql_statements["get_file_prefix"] = (
+    "SELECT CT_MRDS.FILE_MANAGER.GET_BUCKET_PATH(:source_key, :source_file_id, :table_id) FROM DUAL"
+)
+
+
+def get_sql(stmt_id: str):
+    if stmt_id in sql_statements:
+        return sql_statements[stmt_id]
+    else:
+        return
--- a/python/mrds_common/mrds/utils/static_vars.py
+++ b/python/mrds_common/mrds/utils/static_vars.py
@@ -0,0 +1,6 @@
+#
+# Task management variables
+#
+status_running: str = "RUNNING"
+status_failed: str = "N"
+status_success: str = "Y"
--- a/python/mrds_common/mrds/utils/utils.py
+++ b/python/mrds_common/mrds/utils/utils.py
@@ -0,0 +1,83 @@
+import re
+
+
+def parse_uri_with_regex(uri):
+    """
+    Parses an Oracle Object Storage URI using regular expressions to extract the namespace,
+    bucket name, prefix, and object name.
+
+    Parameters:
+        uri (str): The URI string to parse, in the format '/n/{namespace}/b/{bucketname}/o/{object_path}'
+
+    Returns:
+        tuple: A tuple containing (namespace, bucket_name, prefix, object_name)
+    """
+    # Define the regular expression pattern
+    pattern = r"^/n/([^/]+)/b/([^/]+)/o/(.*)$"
+
+    # Match the pattern against the URI
+    match = re.match(pattern, uri)
+
+    if not match:
+        raise ValueError("Invalid URI format")
+
+    # Extract namespace, bucket name, and object path from the matched groups
+    namespace = match.group(1)
+    bucket_name = match.group(2)
+    object_path = match.group(3)
+
+    # Split the object path into prefix and object name
+    if "/" in object_path:
+        # Split at the last '/' to separate prefix and object name
+        prefix, object_name = object_path.rsplit("/", 1)
+        # Ensure the prefix ends with a '/'
+        prefix += "/"
+    else:
+        # If there is no '/', there is no prefix
+        prefix = ""
+        object_name = object_path
+
+    return namespace, bucket_name, prefix, object_name
+
+
+def parse_output_columns(output_columns):
+    xpath_entries = []
+    csv_entries = []
+    static_entries = []
+    a_key_entries = []
+    workflow_key_entries = []
+    xml_position_entries = []
+    column_order = []
+
+    for entry in output_columns:
+        entry_type = entry["type"]
+        column_header = entry["column_header"]
+        column_order.append(column_header)
+
+        if entry_type == "xpath":
+            xpath_expr = entry["value"]
+            is_key = entry["is_key"]
+            xpath_entries.append((xpath_expr, column_header, is_key))
+        elif entry_type == "csv_header":
+            value = entry["value"]
+            csv_entries.append((column_header, value))
+        elif entry_type == "static":
+            value = entry["value"]
+            static_entries.append((column_header, value))
+        elif entry_type == "a_key":
+            a_key_entries.append(column_header)
+        elif entry_type == "workflow_key":
+            workflow_key_entries.append(column_header)
+        elif entry_type == "xpath_element_id": # TODO - update all xml_position namings to xpath_element_id
+            xpath_expr = entry["value"]
+            xml_position_entries.append((xpath_expr, column_header))
+
+    return (
+        xpath_entries,
+        csv_entries,
+        static_entries,
+        a_key_entries,
+        workflow_key_entries,
+        xml_position_entries,
+        column_order,
+    )
--- a/python/mrds_common/mrds/utils/vault.py
+++ b/python/mrds_common/mrds/utils/vault.py
@@ -0,0 +1,23 @@
+import oci
+import ast
+import base64
+
+# Specify the OCID of the secret to retrieve
+
+
+def get_password(ocid):
+
+    # Create vaultsclient using the default config file (\.oci\config) for auth to the API
+    signer = signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
+
+    # Get the secret
+    secretclient = oci.secrets.SecretsClient({}, signer=signer)
+    secretcontents = secretclient.get_secret_bundle(secret_id=ocid)
+
+    # Decode the secret from base64 and print
+    keybase64 = secretcontents.data.secret_bundle_content.content
+    keybase64bytes = keybase64.encode("ascii")
+    keybytes = base64.b64decode(keybase64bytes)
+    key = keybytes.decode("ascii")
+    keydict = ast.literal_eval(key)
+    return keydict["password"]
--- a/python/mrds_common/mrds/utils/xml_utils.py
+++ b/python/mrds_common/mrds/utils/xml_utils.py
@@ -0,0 +1,177 @@
+import xmlschema
+import hashlib
+from lxml import etree
+from typing import Dict, List
+
+
+def validate_xml(xml_file, xsd_file):
+    try:
+        # Create an XMLSchema instance with strict validation
+        schema = xmlschema.XMLSchema(xsd_file, validation="strict")
+        # Validate the XML file
+        schema.validate(xml_file)
+        return True, "XML file is valid against the provided XSD schema."
+    except xmlschema.validators.exceptions.XMLSchemaValidationError as e:
+        return False, f"XML validation error: {str(e)}"
+    except xmlschema.validators.exceptions.XMLSchemaException as e:
+        return False, f"XML schema error: {str(e)}"
+    except Exception as e:
+        return False, f"An error occurred during XML validation: {str(e)}"
+
+
+def extract_data(
+    filename,
+    xpath_columns,         # List[(expr, header, is_key)]
+    xml_position_columns,  # List[(expr, header)]
+    namespaces,
+    workflow_context,
+    encoding_type="utf-8",
+):
+    """
+    Parses an XML file using XPath expressions and extracts data.
+
+    Parameters:
+    - filename (str): The path to the XML file to parse.
+    - xpath_columns (list): A list of tuples, each containing:
+        - XPath expression (str)
+        - CSV column header (str)
+        - Indicator if the field is a key ('Y' or 'N')
+    - xml_position_columns (list)
+    - namespaces (dict): Namespace mapping needed for lxml's xpath()
+
+    Returns:
+    - dict: A dictionary containing headers and rows with extracted data.
+    """
+
+    parser = etree.XMLParser(remove_blank_text=True)
+    tree = etree.parse(filename, parser)
+    root = tree.getroot()
+
+    # Separate out key vs non‐key columns
+    key_cols    = [ (expr, h) for expr, h, k in xpath_columns if k == "Y" ]
+    nonkey_cols = [ (expr, h) for expr, h, k in xpath_columns if k == "N" ]
+
+    # Evaluate every non‐key XPath and keep the ELEMENT nodes
+    nonkey_elements = {}
+    for expr, header in nonkey_cols:
+        elems = root.xpath(expr, namespaces=namespaces)
+        nonkey_elements[header] = elems
+
+    # figure out how many rows total we need
+    # that's the maximum length of any of the nonkey lists
+    if nonkey_elements:
+        row_count = max(len(lst) for lst in nonkey_elements.values())
+    else:
+        row_count = 0
+
+    # pad every nonkey list up to row_count with `None`
+    for header, lst in nonkey_elements.items():
+        if len(lst) < row_count:
+            lst.extend([None] * (row_count - len(lst)))
+
+    # key columns
+    key_values = []
+    for expr, header in key_cols:
+        nodes = root.xpath(expr, namespaces=namespaces)
+        if not nodes:
+            key_values.append("")
+        else:
+            first = nodes[0]
+            txt = (first.text if isinstance(first, etree._Element) else str(first)) or ""
+            key_values.append(txt.strip())
+
+    # xml_position columns
+    xml_positions = {}
+    for expr, header in xml_position_columns:
+        xml_positions[header] = root.xpath(expr, namespaces=namespaces)
+
+    # prepare headers
+    headers = [h for _, h in nonkey_cols] + [h for _, h in key_cols] + [h for _, h in xml_position_columns]
+
+    # build rows
+    rows = []
+    for i in range(row_count):
+        row = []
+
+        # non‐key data
+        for expr, header in nonkey_cols:
+            elem = nonkey_elements[header][i]
+            text = ""
+            if isinstance(elem, etree._Element):
+                text = elem.text or ""
+            elif elem is not None:
+                text = str(elem)
+            row.append(text.strip())
+
+        # key columns
+        row.extend(key_values)
+
+        # xml_position columns
+        for expr, header in xml_position_columns:
+            if not nonkey_cols:
+                row.append("")
+                continue
+
+            first_header = nonkey_cols[0][1]
+            data_elem = nonkey_elements[first_header][i]
+            if data_elem is None:
+                row.append("")
+                continue
+
+            target_list = xml_positions[header]
+            current = data_elem
+            found = None
+            while current is not None:
+                if current in target_list:
+                    found = current
+                    break
+                current = current.getparent()
+
+            if not found:
+                row.append("")
+            else:
+                # compute full‐path with indices
+                path_elems = []
+                walk = found
+                while walk is not None:
+                    idx = 1 + sum(1 for s in walk.itersiblings(preceding=True) if s.tag == walk.tag)
+                    path_elems.append(f"{walk.tag}[{idx}]")
+                    walk = walk.getparent()
+                full_path = "/" + "/".join(reversed(path_elems))
+                row.append(_xml_pos_hasher(full_path, workflow_context["a_workflow_history_key"]))
+
+        rows.append(row)
+
+    return {"headers": headers, "rows": rows}
+
+
+def _xml_pos_hasher(input_string, salt, hash_length=15):
+    """
+    Helps hashing xml positions.
+
+    Parameters:
+        input_string (str): The string to hash.
+        salt (int): The integer salt to ensure deterministic, run-specific behavior.
+        hash_length (int): The desired length of the resulting hash (default is 15 digits).
+
+    Returns:
+        int: A deterministic integer hash of the specified length.
+    """
+    # Ensure the hash length is valid
+    if hash_length <= 0:
+        raise ValueError("Hash length must be a positive integer.")
+    
+    # Combine the input string with the salt to create a deterministic input
+    salted_input = f"{salt}:{input_string}"
+    
+    # Generate a SHA-256 hash of the salted input
+    hash_object = hashlib.sha256(salted_input.encode())
+    full_hash = hash_object.hexdigest()
+    
+    # Convert the hash to an integer
+    hash_integer = int(full_hash, 16)
+    
+    # Truncate or pad the hash to the desired length
+    truncated_hash = str(hash_integer)[:hash_length]
+    
+    return int(truncated_hash)
--- a/python/mrds_common/setup.py
+++ b/python/mrds_common/setup.py
@@ -0,0 +1,50 @@
+import re
+from pathlib import Path
+from setuptools import setup, find_packages
+
+# extract version from mrds/__init__.py
+here = Path(__file__).parent
+init_py = here / "mrds" / "__init__.py"
+_version = re.search(
+    r'^__version__\s*=\s*["\']([^"\']+)["\']', init_py.read_text(), re.MULTILINE
+).group(1)
+
+setup(
+    name="mrds",
+    version=_version,
+    packages=find_packages(),
+    install_requires=[
+        "click>=8.0.0,<9.0.0",
+        "oci>=2.129.3,<3.0.0",
+        "oracledb>=2.5.1,<3.0.0",
+        "PyYAML>=6.0.0,<7.0.0",
+        "lxml>=5.0.0,<5.3.0",
+        "xmlschema>=3.4.0,<3.4.3",
+        "cryptography>=3.3.1,<42.0.0",
+        "PyJWT>=2.0.0,<3.0.0",
+        "requests>=2.25.0,<3.0.0",
+    ],
+    extras_require={
+        "dev": [
+            "black==24.10.0",
+            "tox==4.23.2",
+            "pytest==8.3.4",
+        ],
+    },
+    entry_points={
+        "console_scripts": [
+            "mrds-cli=mrds.cli:cli_main",
+        ],
+    },
+    author="",
+    author_email="",
+    description="MRDS module for MarS ETL POC",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    url="",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "Operating System :: OS Independent",
+    ],
+    python_requires=">=3.11",
+)
--- a/python/mrds_common/tox.ini
+++ b/python/mrds_common/tox.ini
@@ -0,0 +1,17 @@
+# tox.ini
+
+[tox]
+envlist = py310, format
+
+[testenv]
+deps =
+    pytest
+commands =
+    pytest
+
+[testenv:format]
+basepython = python3
+deps =
+    black
+commands =
+    black --check --diff .