This commit is contained in:
Grzegorz Michalski
2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions

View File

View File

@@ -0,0 +1,86 @@
dev:
DEVO_USERNAME: "ap-devo_lab-mrds"
IMPALA_HOSTNAME: "impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site"
HIVE_HOSTNAME: 'hs2-devo-lab21-hive01.dw-devo-lab21.om2y56.b0.cloudera.site'
RANGER_HOSTNAME: "https://devo-lab21-dl-gateway.devo-lab.om2y56.b0.cloudera.site:443/devo-lab21-dl/cdp-proxy-api/ranger"
BUCKET_PREFIX: "s3a://devo-crp-ffppyd8q/"
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3tsglrzfgiyfisxchref774l5y4nrler2vn54lr3li7q"
S3_LOCATION_URI: "https://devo-crp-ffppyd8q.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
DEVO_USERNAME_RQSD: "ap-devo-rqsd-lab"
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
FULL_ACCESS_LIST_RAR: "DISC-DC-RAR-R"
FULL_ACCESS_LIST_MOPDB: ""
FULL_ACCESS_LIST_RQSD: ""
tst:
DEVO_USERNAME: "ap-devo_tst-mrds"
IMPALA_HOSTNAME: "t-impala.devo.escb.eu"
HIVE_HOSTNAME: "hs2-devo-tst21-hive01.dw-devo-tst21.om2y56.b0.cloudera.site"
RANGER_HOSTNAME: "https://devo-tst21-dl-gateway.devo-tst.om2y56.b0.cloudera.site:443/devo-tst21-dl/cdp-proxy-api/ranger"
BUCKET_PREFIX: "s3a://devo-crp-sbul3ju3/"
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyayqqotyowhpoml3v5szkwhmtu4rq6bplpkvdruzupz3ma"
S3_LOCATION_URI: "https://devo-crp-sbul3ju3.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
DEVO_USERNAME_RQSD: "ap-devo-rqsd-tst"
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
FULL_ACCESS_LIST_RAR: "DISC-TC-RAR-R"
FULL_ACCESS_LIST_MOPDB: ""
FULL_ACCESS_LIST_RQSD: ""
acc:
DEVO_USERNAME: "ap-devo_acc-mrds"
IMPALA_HOSTNAME: "impala-proxy-devo-acc21-impala01.dw-devo-acc21.inym23.b0.cloudera.site"
HIVE_HOSTNAME: "hs2-devo-acc21-hive01.dw-devo-acc21.inym23.b0.cloudera.site"
RANGER_HOSTNAME: "https://devo-acc21-dl-gateway.devo-acc.inym23.b0.cloudera.site/devo-acc21-dl/cdp-proxy-api/ranger/"
BUCKET_PREFIX: "s3a://devo-crp-sbc9vbsu/"
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3x3nic3vxsnpzlfshz2ubj6kekny5tvaqsnwkuh2hw2a"
S3_LOCATION_URI: "https://devo-crp-sbc9vbsu.bucket.vpce-0bf4fa440fb60935d-6m9iqoo9.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
DEVO_USERNAME_RQSD: "ap-devo-rqsd-acc"
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyamzhgatnso57mubvg3c6k4ens3orcx4dieo6efukuvm4a"
FULL_ACCESS_LIST_RAR: "DISC-AC-RAR-R"
FULL_ACCESS_LIST_MOPDB: ""
FULL_ACCESS_LIST_RQSD: ""
prd:
DEVO_USERNAME: "ap-devo_prd-mrds"
IMPALA_HOSTNAME: "impala-proxy-devo-prd21-impala01.dw-devo-prd21.inym23.b0.cloudera.site"
HIVE_HOSTNAME: "hs2-devo-prd21-hive01.dw-devo-prd21.inym23.b0.cloudera.site"
RANGER_HOSTNAME: "https://devo-prd21-dl-gateway.devo-prd.inym23.b0.cloudera.site/devo-prd21-dl/cdp-proxy-api/ranger/"
BUCKET_PREFIX: "s3a://devo-crp-2gn5maj9/"
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyace73o3xowa3f3jkw4diqzoiyc6skt34sqnnx4yrbykmq"
S3_LOCATION_URI: "https://devo-crp-2gn5maj9.bucket.vpce-0aa6cf4490536dfd5-qgy4w5sz.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
DEVO_USERNAME_RQSD: "ap-devo-rqsd-prd"
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyacodc43tfgumkw4qyzw4s3j4jp42vp2elakkpwwrmivqa"
FULL_ACCESS_LIST_RAR: "DISC-PC-RAR-R"
FULL_ACCESS_LIST_MOPDB: ""
FULL_ACCESS_LIST_RQSD: ""
rar:
corporate_store: "crp_rar"
oracle_metadata_table: "CORR_RAR.NH_METADATA_INVENTORY"
oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_RAR"
target_s3_bucket: "rar/db"
tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
mopdb:
corporate_store: "crp_mopdb"
oracle_metadata_table: "CT_MOPDB.MOPDB_METADATA_INVENTORY"
oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB"
target_s3_bucket: "mopdb/db"
tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
rqsd:
corporate_store: "crp_rqsd"
oracle_metadata_table: "CT_MRDS.A_DEVO_METADATA_INVENTORY"
oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_RQSD"
target_s3_bucket: "rqsd/db"
tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
# -- target table name as
# SELECT DISTINCT TABLE_ALIAS FROM {oracle_mgmt_table}
# WHERE OWNER = ''
# AND TABLE_NAME = '';
# -- type of access
# SELECT DISTINCT RAR3_TYPE_OF_ACCESS FROM {oracle_metadata_table}
# WHERE A_VALID_TO > SYSDATE AND
# OWNER = ''
# AND TABLE_NAME = '';

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python3
import sys, json
import boto3
from botocore.exceptions import ClientError
from botocore.config import Config
BUCKET = "devo-crp-sbc9vbsu"
PREFIX = "mopdb/db/" # adjust if needed
def show(e):
# Print the structured error if present
resp = getattr(e, "response", {})
code = resp.get("Error", {}).get("Code")
msg = resp.get("Error", {}).get("Message")
rid = resp.get("ResponseMetadata", {}).get("RequestId")
print(f"{type(e).__name__}: {code} {msg} (RequestId={rid})", file=sys.stderr)
def main(endpoint_url=None, region=None, force_path=False):
session = boto3.Session()
cfg = Config(s3={"addressing_style": "path" if force_path else "auto"})
s3 = session.client("s3", region_name=region, endpoint_url=endpoint_url, config=cfg)
sts = session.client("sts", region_name=region)
# Who am I?
try:
ident = sts.get_caller_identity()
print(f"Caller: {ident['Arn']} (acct {ident['Account']})")
except Exception as e:
print("Could not call STS get-caller-identity — credentials not valid for STS.", file=sys.stderr)
show(e); return 1
# Is the bucket reachable at all?
try:
s3.head_bucket(Bucket=BUCKET)
print(f"head_bucket OK on s3://{BUCKET}")
except ClientError as e:
print("head_bucket failed:", file=sys.stderr)
show(e); return 2
# List with zero keys to test just the ListBucket permission
try:
s3.list_objects_v2(Bucket=BUCKET, Prefix=PREFIX, MaxKeys=0)
print(f"list_objects_v2 OK on prefix '{PREFIX}' (permission exists)")
except ClientError as e:
print("list_objects_v2 failed:", file=sys.stderr)
show(e); return 3
# Ask for 1 key to confirm data path works
try:
resp = s3.list_objects_v2(Bucket=BUCKET, Prefix=PREFIX, MaxKeys=1)
print("First key:", resp.get("Contents", [{}])[0].get("Key"))
except ClientError as e:
print("list_objects_v2 (MaxKeys=1) failed:", file=sys.stderr)
show(e); return 4
return 0
if __name__ == "__main__":
# Allow optional args: --endpoint-url URL --region eu-central-1 --force-path
url = None; reg = None; force = False
for i,a in enumerate(sys.argv):
if a == "--endpoint-url": url = sys.argv[i+1]
if a == "--region": reg = sys.argv[i+1]
if a == "--force-path": force = True
sys.exit(main(endpoint_url=url, region=reg, force_path=force))

View File

@@ -0,0 +1,129 @@
import os
import yaml
import datetime
import pandas as pd
from mrds.utils.secrets import get_secret
import mrds.utils.manage_runs as runManager
import mrds.utils.manage_files as fileManager
import mrds.utils.sql_statements as sqls
import oci
from impala.dbapi import (
connect,
ProgrammingError,
DatabaseError,
IntegrityError,
OperationalError,
)
from impala.error import HiveServer2Error
def get_impala_connection(hostname: str, user: str, secret: str):
conn = connect(
host=hostname,
port=443,
auth_mechanism="PLAIN",
user=user,
password=secret,
use_http_transport=True,
http_path="cliservice",
use_ssl=True
)
return conn
def execute_query(query: str,user,hostname,password):
conn = get_impala_connection(hostname, user, password)
print(conn)
columns, result = execute_devo_query(query, conn)
return columns, result
def execute_devo_query(query: str, conn):
#impersonation_configuration = {"impala.doas.user": userid} # to be changed
#impersonation_configuration = {} # to be changed
cursor = conn.cursor()
print("executing query")
try:
cursor.execute(query)
# Check if the query is a SELECT query (i.e., reads data)
return None, cursor.rowcount # rowcount returns the number of rows affected
except OperationalError as oe:
raise Exception(
status_code=500, detail="Failed to connect to Impala: " + str(oe)
)
except ProgrammingError as pe:
raise Exception(status_code=400, detail="Query syntax error: " + str(pe))
except IntegrityError as ie:
raise Exception(
status_code=403, detail="Insufficient permissions: " + str(ie)
)
except DatabaseError as db_err:
raise Exception(status_code=500, detail="Database error: " + str(db_err))
except HiveServer2Error as au_err:
raise Exception(
status_code=403, detail="HiveServer2Error error: " + str(au_err)
)
except Exception as e:
raise Exception(
status_code=500, detail="An unexpected error occurred: " + str(e)
) from e
finally:
try:
if cursor:
cursor.close()
if conn:
conn.close()
except Exception as e:
raise Exception(
status_code=500, detail="Failed to close the connection: " + str(e)
)
def initialize_task(workflow_context, task_name):
# Initialize task
a_task_history_key = runManager.init_task(
task_name,
workflow_context["run_id"],
workflow_context["a_workflow_history_key"],
)
return a_task_history_key
def initialize_config(config_file_path):
# Ensure the file exists
if not os.path.exists(config_file_path):
raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
# Load the configuration
with open(config_file_path, "r") as f:
config_data = yaml.safe_load(f)
return config_data
def main(env_config_path, env, table, corporate_store):
#init setup
envs_info = initialize_config(env_config_path)
environment_info = envs_info[env]
try:
devo_secret_name = environment_info["DEVO_SECRET"]
password = get_secret(devo_secret_name)
except:
print("Failed to retrieve credentials from secrets")
raise(Exception)
# get devo data
try:
execute_query(f"INVALIDATE METADATA {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
execute_query(f"COMPUTE STATS {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
except:
print("Failed to retrieve DEVO data, error during connection or request")
raise(Exception)
return True

View File

@@ -0,0 +1,128 @@
#!/usr/bin/env python3
import argparse, sys
from urllib.parse import urlparse
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError, EndpointConnectionError, NoCredentialsError, ReadTimeoutError, ConnectTimeoutError
def parse_s3_uri(s3_uri: str):
if not s3_uri.startswith("s3://"):
raise ValueError("S3 URI must start with 's3://'")
p = urlparse(s3_uri)
if not p.netloc:
raise ValueError("Missing bucket in S3 URI")
return p.netloc, p.path.lstrip("/")
def parse_location(location: str):
"""Accept s3://... OR https://...amazonaws.com/... and return (bucket, prefix)."""
if location.startswith("s3://"):
return parse_s3_uri(location)
if location.startswith(("http://", "https://")):
p = urlparse(location)
host = p.netloc
path = p.path.lstrip("/")
# Bucket-scoped VPCe host: <bucket>.bucket.vpce-xxxx.s3.<region>.vpce.amazonaws.com
if ".bucket." in host:
bucket = host.split(".bucket.", 1)[0]
return bucket, path
# Virtual-hosted: <bucket>.s3.<region>...
if ".s3." in host and not host.startswith("s3."):
bucket = host.split(".s3.", 1)[0]
return bucket, path
# Path-style: s3.<region>.../<bucket>/...
if host.startswith("s3."):
parts = path.split("/", 1)
bucket = parts[0]
prefix = parts[1] if len(parts) > 1 else ""
return bucket, prefix
raise ValueError(f"Unsupported location: {location}")
def iter_keys(s3, bucket: str, prefix: str, page_size: int, max_items: int, verbose: bool):
print('here')
paginator = s3.get_paginator("list_objects_v2")
kwargs = {"Bucket": bucket, "Prefix": prefix}
pagination = {"PageSize": page_size}
if max_items > 0:
pagination["MaxItems"] = max_items
total = 0
page_num = 0
for page in paginator.paginate(**kwargs, PaginationConfig=pagination):
page_num += 1
contents = page.get("Contents", []) or []
if verbose:
print(f"[page {page_num}] fetched {len(contents)} keys (running total={total + len(contents)})",
file=sys.stderr, flush=True)
for obj in contents:
key = obj["Key"]
yield key
total += 1
def main():
ap = argparse.ArgumentParser(description="List files under an S3 location quickly and safely.")
ap.add_argument("location", help="s3://bucket/prefix/ OR https://<vpc-endpoint-host>/<prefix>")
ap.add_argument("--region", default=None, help="AWS region (e.g., eu-central-1)")
ap.add_argument("--profile", default=None, help="AWS profile to use")
ap.add_argument("--endpoint-url", default=None,
help="Custom S3 endpoint (e.g., https://s3.eu-central-1.vpce.amazonaws.com)")
ap.add_argument("--force-path-addressing", action="store_true",
help="Force path-style addressing (useful with bucket-scoped VPCe hostnames)")
ap.add_argument("--page-size", type=int, default=1000, help="S3 page size (default 1000)")
ap.add_argument("--max-items", type=int, default=0, help="Stop after N keys (0 = no limit)")
ap.add_argument("--connect-timeout", type=float, default=10.0, help="Seconds (default 10)")
ap.add_argument("--read-timeout", type=float, default=30.0, help="Seconds (default 30)")
ap.add_argument("--retries", type=int, default=3, help="Max retry attempts (default 3)")
ap.add_argument("--relative", action="store_true", help="Print keys relative to the prefix")
ap.add_argument("--verbose", "-v", action="store_true", help="Print progress to stderr")
args = ap.parse_args()
bucket, prefix = parse_location(args.location)
# Session & client with explicit timeouts and optional path addressing
sess_kwargs = {}
if args.profile:
sess_kwargs["profile_name"] = args.profile
session = boto3.Session(**sess_kwargs)
cfg = Config(
connect_timeout=args.connect_timeout,
read_timeout=args.read_timeout,
retries={"max_attempts": args.retries, "mode": "standard"},
s3={"addressing_style": "path" if args.force_path_addressing else "auto"},
)
s3 = session.client("s3", region_name=args.region, endpoint_url=args.endpoint_url, config=cfg)
# Quick preflight: try a 0-key list to surface auth/endpoint issues fast
try:
_ = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, MaxKeys=0)
except ClientError as e:
print(f"Preflight failed (auth/permissions/endpoint): {e}", file=sys.stderr)
sys.exit(1)
except (EndpointConnectionError, ReadTimeoutError, ConnectTimeoutError) as e:
print(f"Network/endpoint error: {e}", file=sys.stderr)
sys.exit(1)
try:
for key in iter_keys(s3, bucket, prefix, args.page_size, args.max_items, args.verbose):
if args.relative and prefix and key.startswith(prefix):
print(key[len(prefix):].lstrip("/"))
else:
print(f"s3://{bucket}/{key}")
except KeyboardInterrupt:
print("\nInterrupted.", file=sys.stderr)
sys.exit(130)
except NoCredentialsError:
print("No AWS credentials found. Set env vars or use --profile.", file=sys.stderr)
sys.exit(1)
except (EndpointConnectionError, ReadTimeoutError, ConnectTimeoutError) as e:
print(f"Network/timeout listing objects: {e}", file=sys.stderr)
sys.exit(1)
except ClientError as e:
print(f"AWS error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,145 @@
class Options:
def __init__(self, args):
self.options = {
"corporate_store": None,
"service_name": None,
"source_schema": None,
"source_table": None,
"access_type": None,
"oracle_metadata_table": None,
"oracle_igam_table": None,
"query_metadata_access_type1": None,
"query_metadata_access_type2a": None,
"query_igam_roles": None,
"ora_jdbc_url_dwh": None,
"ora_jdbc_url_ods": None,
"sql_file_path": None,
"sql_filename_grants": None,
"sentry_role_environment": None,
"ranger_script": None,
"type3_access_table": None,
"type3_access_table_key_column": None,
"type3_source_table_key_column": None,
"target_s3_bucket": None,
"ranger_s3_bucket": None,
"ranger_s3_path": None,
"rar_full_access_entitlement_list": None,
"target_table": None,
"tech_meta_data_fields": None,
"full_access_entitlement_list": None
}
# Initialize options from arguments
self.initialize_options(args)
def initialize_options(self, args):
# Assuming args is a list of key-value pairs
for key in args.keys():
if key in self.options:
self.options[key] = args[key]
def get_option_value(self, key):
return self.options.get(key, "")
@property
def corporate_store(self):
return self.get_option_value("corporate_store")
@property
def source_schema(self):
return self.get_option_value("source_schema")
@property
def source_table(self):
return self.get_option_value("source_table")
@property
def access_type(self):
return self.get_option_value("access_type")
@property
def oracle_metadata_table(self):
return self.get_option_value("oracle_metadata_table")
@property
def oracle_igam_table(self):
return self.get_option_value("oracle_igam_table")
@property
def query_metadata_access_type1(self):
return self.get_option_value("query_metadata_access_type1")
@property
def query_metadata_access_type2a(self):
return self.get_option_value("query_metadata_access_type2a")
@property
def query_igam_roles(self):
return self.get_option_value("query_igam_roles")
@property
def ora_jdbc_url_dwh(self):
return self.get_option_value("ora_jdbc_url_dwh")
@property
def ora_jdbc_url_ods(self):
return self.get_option_value("ora_jdbc_url_ods")
@property
def sql_file_path(self):
return self.get_option_value("sql_file_path")
@property
def sql_filename_grants(self):
return self.get_option_value("sql_filename_grants")
@property
def sentry_role_environment(self):
return self.get_option_value("sentry_role_environment")
@property
def ranger_script(self):
return self.get_option_value("ranger_script")
@property
def type3_access_table(self):
return self.get_option_value("type3_access_table")
@property
def type3_access_table_key_column(self):
return self.get_option_value("type3_access_table_key_column")
@property
def type3_source_table_key_column(self):
return self.get_option_value("type3_source_table_key_column")
@property
def target_s3_bucket(self):
return self.get_option_value("target_s3_bucket")
@property
def ranger_s3_bucket(self):
return self.get_option_value("ranger_s3_bucket")
@property
def ranger_s3_path(self):
return self.get_option_value("ranger_s3_path")
@property
def rar_full_access_entitlement_list(self):
return self.get_option_value("rar_full_access_entitlement_list")
@property
def target_table(self):
return self.get_option_value("target_table")
@property
def tech_meta_data_fields(self):
return self.get_option_value("tech_meta_data_fields")
@property
def full_access_entitlement_list(self):
return self.get_option_value("full_access_entitlement_list")
@property
def service_name(self):
return self.get_option_value("service_name")

View File

@@ -0,0 +1,73 @@
from impala.dbapi import (
connect,
ProgrammingError,
DatabaseError,
IntegrityError,
OperationalError,
)
from impala.error import HiveServer2Error
def get_DEVO_connection(hostname: str, user: str, secret: str):
conn = connect(
host=hostname,
port=443,
auth_mechanism="PLAIN",
user=user,
password=secret,
use_http_transport=True,
http_path="cliservice",
use_ssl=True,
)
return conn
def execute_devo_query(query: str, conn):
cursor = None
try:
cursor = conn.cursor()
cursor.execute(query) # Check if the query is a SELECT query (i.e., reads data)
if query.strip().lower().startswith("select"):
rows = cursor.fetchall()
columns = [col[0] for col in cursor.description]
return columns, rows
else:
# For non-SELECT queries (e.g., INSERT, UPDATE, DELETE), just return affected rows
return None, cursor.rowcount # rowcount returns the number of rows affected
except OperationalError as oe:
raise Exception("Failed to connect to DEVO: " + str(oe))
except ProgrammingError as pe:
raise Exception("Query syntax error: " + str(pe))
except IntegrityError as ie:
raise Exception("Insufficient permissions: " + str(ie))
except DatabaseError as db_err:
raise Exception("Database error: " + str(db_err))
except HiveServer2Error as au_err:
raise Exception("HiveServer2Error error: " + str(au_err))
finally:
try:
if cursor:
cursor.close()
if not conn:
conn.close()
except Exception as e:
raise Exception(status_code=500, detail=f"Failed to close the cursor or impala connection: {str(e)}") from e
def execute_query(query: str, user: str, hostname: str,password):
conn = get_DEVO_connection(hostname, user, password)
columns, result = execute_devo_query(query, conn)
return columns, result
#sql="CREATE EXTERNAL TABLE IF NOT EXISTS crp_rar.testInternalTable ( iid STRING,RANDOM_DATE DATE, number int) ;"
#sql_drop="DROP TABLE IF EXISTS crp_rar.NH_PRICE"
#print( execute_query("SELECT 1","ap-informatica-ipcwt","t-impala.devo.escb.eu","Start_123456789"))
#print( execute_query("SELECT 1","ap-devo_tst-mrds","t-impala.devo.escb.eu","V1XqZ*#fvwQl=nRG*idI"))
#print( execute_query("SELECT 1","ap-devo_lab-mrds","impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site","PHkvyVonyePAmZD8wUuw!"))

View File

@@ -0,0 +1,69 @@
## Step 3: Let's create a policy
from apache_ranger.model.ranger_service import *
from apache_ranger.client.ranger_client import *
from apache_ranger.model.ranger_policy import *
from mrds.utils.secrets import get_secret
## Step 1: create a client to connect to Apache Ranger admin
ranger_url ="https://devo-lab21-dl-gateway.devo-lab.om2y56.b0.cloudera.site:443/devo-lab21-dl/cdp-proxy-api/ranger"
password= get_secret("ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3tsglrzfgiyfisxchref774l5y4nrler2vn54lr3li7q")
ranger_auth = ('ap-devo_lab-mrds', password)
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
ranger = RangerClient(ranger_url, ranger_auth)
ranger.session.verify = False
# to disable SSL certificate validation (not recommended for production use!)
#
# ranger.session.verify = False
## Step 2: Let's create a service
policy = RangerPolicy()
policy.service = "cm_hive" #da hardcodare
policy.name = 'cpo_crp_mopdb_sgroi_1' #corporatestore_table_accessType
policy.resources = { 'database': RangerPolicyResource({ 'values': ['crp_RQSD'] }),
'table': RangerPolicyResource({ 'values': ['ANNEX_1_1_ALL'] }),
'column': RangerPolicyResource({ 'values': ['*'] }) } #change with correct values
allowItem1 = RangerPolicyItem() #to try allowItem1.groups
allowItem1.groups = ["d_mopdb_mpec"]
#allowItem1.users = [] #to try for single users
allowItem1.accesses = [ RangerPolicyItemAccess({ 'type': 'create' }),
RangerPolicyItemAccess({ 'type': 'alter' }),
RangerPolicyItemAccess({ 'type': 'select' }),
RangerPolicyItemAccess({ 'type': 'drop' }) ]
"""denyItem1 = RangerPolicyItem()
denyItem1.users = [ 'admin' ] #does it make sense to deny and not allow?
denyItem1.accesses = [ RangerPolicyItemAccess({ 'type': 'drop' }) ]"""
policy.policyItems = [ allowItem1 ]
#policy.denyPolicyItems = [ denyItem1 ]
#policy2=ranger.get_policy_by_id(policyId=5086)
#print(ranger.get_policy(serviceName="cm_hive",policyName='crp_rar_testinternalTable_alcesso1'))
#print(ranger.find_policies({"service": "cm_hive", "resources": {"database": {"values": ["crp_rar"], "isExcludes": False , "isRecursive": False}, "column": {"values": ["*"], "isExcludes": False, "isRecursive": False}, "table": {"values": ["testInternalTable"], "isExcludes": False, "isRecursive": False}}}))
#print(ranger.delete_policy(serviceName="cm_hive",policyName="crp_rar_testinternalTable_alcesso1"))
#print(policy2)
#print('Creating policy: name=' + policy.name)
#created_policy = ranger.create_policy(policy)
#print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
## Step 4: Delete policy and service created above
#print('Deleting policy: id=' + str(created_policy.id))
#ranger.delete_policy_by_id(created_policy.id)
data=ranger.get_policies_in_service(serviceName="cm_hive")
with open("output.txt", "w") as file:
for string in data:
file.write(str(string))
file.close()

View File

@@ -0,0 +1,250 @@
from typing import List, Optional
from apache_ranger.model.ranger_service import *
from apache_ranger.client.ranger_client import *
from apache_ranger.model.ranger_policy import *
import re
def add_table_permission_groups(corporate_store: str, target_table: str, access_type: str, source_table: str, igam_entitlement_list: List[str], columns_list: Optional[List[str]] = None, row_list: Optional[List[str]] = None):
igam_entitlements = igam_entitlement_list + ["public"] if source_table.lower() == "rar_sources_igam_sentry" else igam_entitlement_list
column_details = columns_list if columns_list is not None else ["*"]
columns = column_details
row_filter = row_list if row_list is not None else ["*"]
filter_condition = ','.join([f"'{row}'" for row in row_filter])
igam_roles = [x.lower() for x in igam_entitlements if x !=""]
return {
'corporate_store': corporate_store,
'target_table': target_table,
'access_type': access_type,
'columns': columns,
'rows': filter_condition,
'igam_roles': igam_roles
}
from typing import List, Optional
# --- helpers ---------------------------------------------------------------
def _policy_name_from_params(config, policy_id: Optional[str] = None) -> Optional[str]:
"""
Build the exact policy name used by your create functions.
Returns None for types where we need to match multiple (e.g., 2a without id).
"""
cs = config['corporate_store'].lower()
tbl = config['target_table'].lower()
at = config['access_type'].lower()
base = f"cpo_{cs}_{tbl}_{at}"
if at == "1":
# yaml_format_1
return base
elif at == "2a":
# yaml_format_2a -> requires policy_id to be exact
if policy_id:
return f"{base}_policy_{policy_id}"
# without policy_id, well delete all that start with this prefix
return None
elif at == "2b":
# yaml_format_2b
return f"{base}_row_level_policy"
elif at == "3":
# yaml_format_3 uses same name pattern as 2b in your script
return f"{base}_row_level_policy"
else:
raise ValueError(f"Invalid access type '{config['access_type']}'. Expected one of: 1, 2a, 2b, 3.")
def _ranger_client(env_config) -> RangerClient:
ranger_url = env_config['RANGER_HOSTNAME']
ranger_auth = ( env_config['DEVO_USERNAME'], env_config['DEVO_SECRET'])
client = RangerClient(ranger_url, ranger_auth)
client.session.verify = False
return client
# --- main deletion API -----------------------------------------------------
def delete_policy(config,env_config, policy_id: Optional[str] = None) -> List[str]:
"""
Delete Ranger policy/policies by name based on:
- params['corporate_store']
- params['target_table']
- typeOfAccess: "1", "2a", "2b", "3"
- policy_id: optional (only meaningful for '2a')
Returns a list of deleted policy names.
"""
ranger = _ranger_client(env_config)
service_name = "cm_hive"
# Try build exact name
deleted: List[str] = []
# If we dont have an exact name (e.g. type 2a without policy_id),
# delete *all* that match the expected prefix.
cs = config['corporate_store'].lower()
tbl = config['target_table'].lower()
at = config['access_type'].lower()
prefix = f"cpo_{cs}_{tbl}_"
print(prefix)
# Fetch all policies for the table and filter client-side to reduce calls.
start = 0
candidates = []
page_size=1000
service_name="cm_hive"
while True:
params = {"pageSize": page_size, "startIndex": start}
page = ranger.get_policies_in_service(service_name, params=params) or []
candidates.extend(page)
if len(page) < page_size:
break
start += len(page)
for p in candidates:
name = p["name"]
print(f"analizing policy:{name}")
if re.fullmatch(f"{prefix}([0-9]?[a-z]?)(_policy_)?([0-9]*)?(_row_level_policy)?(full_access)?$",name) != None:
try:
ranger.delete_policy_by_id(p["id"])
deleted.append(name)
except Exception:
# continue attempting others
pass
if not deleted:
raise RuntimeError(
f"No matching policies found for deletion with prefix '{prefix}'. "
)
return deleted
def generate_policy(params,env_config, policy_id: Optional[str] = None):
access_type = params['access_type'].lower()
if access_type == "1":
return yaml_format_1(params,env_config)
elif access_type == "2a":
return yaml_format_2a(params, env_config, policy_id)
elif access_type == "2b":
return yaml_format_1(params,env_config)
elif access_type == "3":
return yaml_format_3(params)
else:
raise Exception(f"Invalid access type {params['access_type']}. Please check the input param")
def yaml_format_1(params,env_config) -> str:
ranger=_ranger_client(env_config)
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
policy = RangerPolicy()
policy.service = "cm_hive" #harcoded
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}" #corporatestore_table_accessType
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
'column': RangerPolicyResource({ 'values': params['columns'] }) }
allowItem1 = RangerPolicyItem()
allowItem1.groups = params['igam_roles']
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
policy.policyItems = [ allowItem1 ]
created_policy = ranger.create_policy(policy)
print('Created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
return policy
def yaml_format_2a(params, env_config,policy_id: Optional[str]) -> str:
policy_ID = policy_id if policy_id is not None else "0"
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
ranger = _ranger_client(env_config)
policy = RangerPolicy()
policy.service = "cm_hive" #harcoded
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}" #corporatestore_table_accessType
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
'column': RangerPolicyResource({ 'values': params['columns'] }) }
allowItem1 = RangerPolicyItem()
allowItem1.groups = params['igam_roles']
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
policy.policyItems = [ allowItem1 ]
created_policy = ranger.create_policy(policy)
print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
return policy
def yaml_format_2b(params,env_config, full_access_list: Optional[List]) -> str:
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
ranger = _ranger_client(env_config)
policy = RangerPolicy()
policy.service = "cm_hive" #harcoded
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy" #corporatestore_table_accessType
policy.isEnabled = True
policy.resources ={ 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
'table': RangerPolicyResource({ 'values': [params['target_table']] })}
rowFilterAllowItem1= RangerRowFilterPolicyItem()
rowFilterAllowItem1.groups = params['igam_roles']
rowFilterAllowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
rowFilterAllowItem1.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"lower(source) IN (select lower(rar_subsource_id) from {params['corporate_store'].lower()}.t_ref_rar_sources_igam_sentry where lower(rar_igam_entitlement) IN (select ad_group from {params['corporate_store'].lower()}.active_directory_user_groups where username = lower(regexp_extract(current_user(),'[^@]*',0))))" })
rowFilterAllowItem2= RangerRowFilterPolicyItem()
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
policy.rowFilterPolicyItems= [rowFilterAllowItem1, rowFilterAllowItem2]
created_policy = ranger.create_policy(policy)
print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
return policy
def yaml_format_3(params, env_config,filterString, full_access_list: Optional[List]) -> str:
ranger = _ranger_client(env_config)
policy = RangerPolicy()
policy.service = "cm_hive" # hardcoded
policy.name = (
f"cpo_{params['corporate_store'].lower()}_"
f"{params['target_table'].lower()}_"
f"{params['access_type'].lower()}_row_level_policy"
)
policy.isEnabled = True
policy.resources = {
"database": RangerPolicyResource({"values": [params["corporate_store"].lower()]}),
"table": RangerPolicyResource({"values": [params["target_table"]]}),
}
# Row filter item
rowFilterAllowItem = RangerRowFilterPolicyItem()
rowFilterAllowItem.groups = params["igam_roles"]
rowFilterAllowItem.accesses = [RangerPolicyItemAccess({"type": "select"})]
rowFilterAllowItem.rowFilterInfo = RangerPolicyItemRowFilterInfo(
{
"filterExpr": filterString
}
)
rowFilterAllowItem2= RangerRowFilterPolicyItem()
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
policy.rowFilterPolicyItems = [rowFilterAllowItem,rowFilterAllowItem2]
# Create policy in Ranger
created_policy = ranger.create_policy(policy)
print(f" created policy: name={created_policy.name}, id={created_policy.id}")
return policy

View File

@@ -0,0 +1,345 @@
from typing import List, Optional
from apache_ranger.model.ranger_service import *
from apache_ranger.client.ranger_client import *
from apache_ranger.model.ranger_policy import *
import re
def add_table_permission_groups(corporate_store: str, target_table: str, access_type: str, source_table: str, igam_entitlement_list: List[str], columns_list: Optional[List[str]] = None, row_list: Optional[List[str]] = None):
igam_entitlements = igam_entitlement_list + ["public"] if source_table.lower() == "rar_sources_igam_sentry" else igam_entitlement_list
column_details = columns_list if columns_list is not None else ["*"]
columns = column_details
row_filter = row_list if row_list is not None else ["*"]
filter_condition = ','.join([f"'{row}'" for row in row_filter])
igam_roles = [x.lower() for x in igam_entitlements if x !=""]
return {
'corporate_store': corporate_store,
'target_table': target_table,
'access_type': access_type,
'columns': columns,
'rows': filter_condition,
'igam_roles': igam_roles
}
from typing import List, Optional
# --- helpers ---------------------------------------------------------------
def _policy_name_from_params(config, policy_id: Optional[str] = None) -> Optional[str]:
"""
Build the exact policy name used by your create functions.
Returns None for types where we need to match multiple (e.g., 2a without id).
"""
cs = config.corporate_store.lower()
tbl = config.target_table.lower()
at = config.access_type.lower()
base = f"cpo_{cs}_{tbl}_{at}"
if at == "1":
# yaml_format_1
return base
elif at == "2a":
# yaml_format_2a -> requires policy_id to be exact
if policy_id:
return f"{base}_policy_{policy_id}"
# without policy_id, well delete all that start with this prefix
return None
elif at == "2b":
# yaml_format_2b
return f"{base}_row_level_policy"
elif at == "3":
# yaml_format_3 uses same name pattern as 2b in your script
return f"{base}_row_level_policy"
else:
raise ValueError(f"Invalid access type '{config.access_type}'. Expected one of: 1, 2a, 2b, 3.")
def _ranger_client(env_config) -> RangerClient:
ranger_url = env_config['RANGER_HOSTNAME']
ranger_auth = ( env_config['DEVO_USERNAME'], env_config['DEVO_SECRET'])
client = RangerClient(ranger_url, ranger_auth)
client.session.verify = False
return client
# --- main deletion API -----------------------------------------------------
def delete_policy(config,env_config, policy_id: Optional[str] = None) -> List[str]:
"""
Delete Ranger policy/policies by name based on:
- params['corporate_store']
- params['target_table']
- typeOfAccess: "1", "2a", "2b", "3"
- policy_id: optional (only meaningful for '2a')
Returns a list of deleted policy names.
"""
ranger = _ranger_client(env_config)
service_name = "cm_hive"
# Try build exact name
deleted: List[str] = []
# If we dont have an exact name (e.g. type 2a without policy_id),
# delete *all* that match the expected prefix.
cs = config.corporate_store.lower()
tbl = config.target_table.lower()
at = config.access_type.lower()
prefix = f"cpo_{cs}_{tbl}_"
# Fetch all policies for the table and filter client-side to reduce calls.
start = 0
candidates = []
page_size=1000
service_name="cm_hive"
while True:
params = {"pageSize": page_size, "startIndex": start}
page = ranger.get_policies_in_service(service_name, params=params) or []
candidates.extend(page)
if len(page) < page_size:
break
start += len(page)
for p in candidates:
name = p["name"]
print(f"analizing policy:{name}")
if re.fullmatch(f"{prefix}([0-9]?[a-z]?)(_policy_)?([0-9]*)?(_row_level_policy)?(full_access)?$",name) != None:
try:
ranger.delete_policy_by_id(p["id"])
deleted.append(name)
except Exception:
# continue attempting others
pass
if not deleted:
raise RuntimeError(
f"No matching policies found for deletion with prefix '{prefix}'. "
f"Provide 'policy_id' to delete a specific 2a policy."
)
return deleted
def generate_policy(params,env_config, policy_id: Optional[str] = None):
access_type = params['access_type'].lower()
if access_type == "1":
return yaml_format_1(params,env_config)
elif access_type == "2a":
return yaml_format_2a(params, env_config, policy_id)
elif access_type == "2b":
return yaml_format_1(params,env_config)
elif access_type == "3":
return yaml_format_3(params)
else:
raise Exception(f"Invalid access type {params['access_type']}. Please check the input param")
def yaml_format_1(params,env_config) -> str:
ranger=_ranger_client(env_config)
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
policy = RangerPolicy()
policy.service = "cm_hive" #harcoded
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}" #corporatestore_table_accessType
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
'column': RangerPolicyResource({ 'values': params['columns'] }) }
allowItem1 = RangerPolicyItem()
allowItem1.groups = params['igam_roles']
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
policy.policyItems = [ allowItem1 ]
print(policy)
try:
created_policy = ranger.create_policy(policy)
print('Created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
except:
pass
'''
yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: allow CRP RAR users to select core tables"
devo_ranger_client:
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}"
policy:
service: cm_hive
resources:
database:
values:
- {params['corporate_store'].lower()}
table:
values:
- {params['target_table']}
column:
values:
{params['columns']}
policyItems:
- groups:
{params['igam_roles'].lower()}
accesses:
- select
"""
return yaml_format'
'''
def yaml_format_2a(params, env_config,policy_id: Optional[str]) -> str:
policy_ID = policy_id if policy_id is not None else "0"
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
ranger = _ranger_client(env_config)
policy = RangerPolicy()
policy.service = "cm_hive" #harcoded
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}" #corporatestore_table_accessType
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
'column': RangerPolicyResource({ 'values': params['columns'] }) }
allowItem1 = RangerPolicyItem()
allowItem1.groups = params['igam_roles']
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
policy.policyItems = [ allowItem1 ]
print(policy)
print("\n\n")
#created_policy = ranger.create_policy(policy)
#print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
'''
yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: allow CRP RAR users to select core tables"
devo_ranger_client:
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}"
policy:
service: cm_hive
resources:
database:
values:
- {params['corporate_store'].lower()}
table:
values:
- {params['target_table']}
column:
values:
{params['columns']}
policyItems:
- groups:
{params['igam_roles'].lower()}
accesses:
- select
"""
return yaml_format'
'''
def yaml_format_2b(params,env_config, full_access_list: Optional[List]) -> str:
# For Kerberos authentication
#
# from requests_kerberos import HTTPKerberosAuth
#
# ranger_auth = HTTPKerberosAuth()
ranger = _ranger_client(env_config)
policy = RangerPolicy()
policy.service = "cm_hive" #harcoded
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy" #corporatestore_table_accessType
policy.isEnabled = True
policy.resources ={ 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
'table': RangerPolicyResource({ 'values': [params['target_table']] })}
rowFilterAllowItem1= RangerRowFilterPolicyItem()
rowFilterAllowItem1.groups = params['igam_roles']
rowFilterAllowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
rowFilterAllowItem1.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"lower(source) IN (select lower(rar_subsource_id) from {params['corporate_store'].lower()}.t_ref_rar_sources_igam_sentry where lower(rar_igam_entitlement) IN (select ad_group from {params['corporate_store'].lower()}.active_directory_user_groups where username = lower(regexp_extract(current_user(),'[^@]*',0))))" })
rowFilterAllowItem2= RangerRowFilterPolicyItem()
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
policy.rowFilterPolicyItems= [rowFilterAllowItem1, rowFilterAllowItem2]
print(policy)
created_policy = ranger.create_policy(policy)
print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
'''
yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: filter by confidentiality level"
devo_ranger_client:
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy"
policy:
isEnabled: "true"
service: cm_hive
resources:
database:
values:
- {params['corporate_store'].lower()}
table:
values:
- {params['target_table']}
rowFilterPolicyItems:
"""
return yaml_format
'''
def yaml_format_3(params, env_config,filterString, full_access_list: Optional[List]) -> str:
ranger = _ranger_client(env_config)
policy = RangerPolicy()
policy.service = "cm_hive" # hardcoded
policy.name = (
f"cpo_{params['corporate_store'].lower()}_"
f"{params['target_table'].lower()}_"
f"{params['access_type'].lower()}_row_level_policy"
)
policy.isEnabled = True
policy.resources = {
"database": RangerPolicyResource({"values": [params["corporate_store"].lower()]}),
"table": RangerPolicyResource({"values": [params["target_table"]]}),
}
# Row filter item
rowFilterAllowItem = RangerRowFilterPolicyItem()
rowFilterAllowItem.groups = params["igam_roles"]
rowFilterAllowItem.accesses = [RangerPolicyItemAccess({"type": "select"})]
rowFilterAllowItem.rowFilterInfo = RangerPolicyItemRowFilterInfo(
{
"filterExpr": filterString
}
)
rowFilterAllowItem2= RangerRowFilterPolicyItem()
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
policy.rowFilterPolicyItems = [rowFilterAllowItem,rowFilterAllowItem2]
print(policy)
# Create policy in Ranger
created_policy = ranger.create_policy(policy)
print(f" created policy: name={created_policy.name}, id={created_policy.id}")
return created_policy
"""
yaml_format = f"- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: filter by confidentiality level"
devo_ranger_client:
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy"
policy:
isEnabled: "true"
service: cm_hive
resources:
database:
values:
- {params['corporate_store'].lower()}
table:
values:
- {params['target_table']}
rowFilterPolicyItems:
return yaml_format
"""

View File

@@ -0,0 +1,793 @@
import pandasql as ps
import pandas as pd
import mrds.utils.manage_files as fileManager
import logging
import tableBuilderQueries as tbq
from devo_query import execute_query
import ranger_updater_old as ranger
import os
import yaml
import FlowOptions as fo
import numpy as np
from mrds.utils.secrets import get_secret
import traceback
from mrds.utils import oraconn
# Set up basic configuration for logging
logging.basicConfig(level=logging.INFO)
# Create a logger object
logger = logging.getLogger(__name__)
import re
#0 utilities
def initialize_config(config_file_path):
# Ensure the file exists
if not os.path.exists(config_file_path):
raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
# Load the configuration
with open(config_file_path, "r") as f:
config_data = yaml.safe_load(f)
return config_data
def fix_impala_sql(sql: str) -> str:
# List of reserved keywords in Impala that need backticks if used as column names
impala_reserved_keywords = {
'date', 'value', 'source', 'comment', 'partition', 'row', 'select', 'insert',
'table', 'external', 'format', 'location', 'stored', 'inputformat', 'outputformat',
'scenario', 'string', 'int', 'decimal', 'timestamp', 'float', 'double','procedure', 'floor'
}
# Regex pattern to find column definitions
pattern = re.compile(
r'(?P<col>`?\w+`?)\s+(?P<type>[A-Za-z]+\s*(?:\([^)]+\))?)\s*(?P<comment>comment\s*\'[^\']*\'|)?',
re.IGNORECASE
)
def replace(match):
col = match.group('col').strip('`')
dtype = match.group('type')
comment = match.group('comment') or ''
# Add backticks only if column name is a reserved keyword or contains special chars
if col.lower() in impala_reserved_keywords or not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', col):
col = f'`{col}`'
return f"{col} {dtype} {comment}".strip()
# Only replace column list part between parentheses
table_def_start = sql.find('(')
table_def_end = sql.find('ROW FORMAT SERDE', table_def_start)
if table_def_start == -1 or table_def_end == -1:
raise ValueError("Invalid SQL format: Missing column definition parentheses.")
before = sql[:table_def_start + 1]
columns = sql[table_def_start + 1:table_def_end]
after = sql[table_def_end:]
# Replace all columns inside definition
fixed_columns = pattern.sub(replace, columns)
# Combine and return
final= before + fixed_columns + after
final=final.replace("\\'", "").replace('\\\\', '\\')
return final
def applyQueryParameters(query: str, parameters: str) -> str:
"""
Replaces placeholders in the query with values from parameters.
Parameters:
- query: Original query string with placeholders like $$$1, $$$2, etc.
- parameters: Semicolon-separated string of parameter values.
Returns:
- String with the query filled with parameter values.
"""
filled_query = query
if parameters:
# Split the parameters string and reverse the list
params_array = parameters.split(';')[::-1]
index = len(params_array)
for param in params_array:
# Replace the placeholder $$$<index> with the parameter
placeholder = f"$$${index}"
filled_query = filled_query.replace(placeholder, param)
index -= 1 # Decrement the index
return filled_query
def format_column_definition(row):
if pd.isnull(row['data_description']):
# If data_description is null, only include column_name and data_type_string
return f"{row['column_name']} {row['data_type_string']}"
else:
# If data_description is present, include it with a comment
# Ensure data_description does not contain single quotes
data_description = str(row['data_description']).replace("'", "\\'")
return f"{row['column_name']} {row['data_type_string']} comment '{data_description}'"
#1 receive table name and check for target table and access type
def execute_oracle_query(sql):
oracle_conn = oraconn.connect('MRDS_LOADER_MOPDB')
cursor = oracle_conn.cursor()
options=cursor.execute(sql).fetchall()
oracle_conn.commit()
df = pd.DataFrame(options,columns= [row[0].lower() for row in cursor.description])
## fetch db dtypes
cursor.close()
oracle_conn.close()
return df
def get_target_table(oracle_mgmt_table,source_schema,source_table, env):
sql=f"SELECT DISTINCT TABLE_ALIAS FROM {oracle_mgmt_table} WHERE OWNER = '{source_schema}' AND TABLE_NAME = '{source_table}'"
df=execute_oracle_query(sql)
return df
def get_type_ofAccess(oracle_metadata_table,source_schema,source_table,env):
sql=f"SELECT DISTINCT RAR3_TYPE_OF_ACCESS FROM {oracle_metadata_table} WHERE A_VALID_TO > SYSDATE AND OWNER = '{source_schema}'AND TABLE_NAME = '{source_table}'"
df=execute_oracle_query(sql)
return df
#2 load metadata
def readIGAMRoles( config ,env):
queryParams = "'" + config.sentry_role_environment + "'"
igamRolesQuery = tbq.get_query_igam_roles(config.oracle_igam_table,config.service_name)
logger.info(f"Querying the IGAM Table")
queryWithParamsIgamSentry = applyQueryParameters(igamRolesQuery, queryParams)
logger.info(f"Replaced params to IGAM Table:")
igamRoleDF = execute_oracle_query(queryWithParamsIgamSentry)
return igamRoleDF
def loadMetadataTable( config,env ):
metadataQuery = tbq.get_query_metadata(config.oracle_metadata_table, config.source_schema, config.source_table)
logger.info("Map Oracle metadata (data types) to Hive query: ")
jdbcMetaDataDF = df=execute_oracle_query(metadataQuery)
logger.info("Fetch all fields for table and concatenate them separated by ','")
tableDataList = jdbcMetaDataDF.apply(format_column_definition, axis=1).tolist()
tableFields = ",".join(tableDataList)
return tableFields
#3 drop table and policies
def deleteExternalTable(config,env_config):
try:
deleted=ranger.delete_policy(config,env_config)
except Exception as e:
pass
sql_drop = f"DROP TABLE IF EXISTS {config.corporate_store}.{config.target_table}"
execute_query(
sql_drop,
env_config['DEVO_USERNAME'], env_config['IMPALA_HOSTNAME'], env_config['DEVO_SECRET'],
)
#4 create external table and policies
def createExternalTables( config, tableFields,env_config ):
sql_create = (
f"CREATE EXTERNAL TABLE {config.corporate_store}.{config.target_table} "
f"({tableFields}, {config.tech_meta_data_fields}) "
"ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' "
"STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' "
"OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' "
f"LOCATION '{config.target_s3_bucket}/{config.target_table}' "
"TBLPROPERTIES ("
"'external.table.purge'='true', "
"'parquet.compression'='snappy')"
)
sql_create=fix_impala_sql(sql_create)
execute_query(sql_create,env_config['DEVO_USERNAME'],env_config['HIVE_HOSTNAME'],env_config['DEVO_SECRET'])
def createTableFromExternal( config, tableFields,env_config ):
sql_create = (
f"CREATE EXTERNAL TABLE {config.corporate_store}.{config.target_table} AS "
f"SELECT * FROM {config.corporate_store}.{config.target_table}_EXT"
)
execute_query(sql_create,env_config['DEVO_USERNAME'],env_config['HIVE_HOSTNAME'],env_config['DEVO_SECRET'])
def accessTypeMapper(config, env_config, igamRoleDF):
if config.access_type.lower() == '1':
accessType_1(config, env_config, igamRoleDF)
elif (config.access_type.lower() == '2a'):
accessType_2A(config, env_config, igamRoleDF)
elif (config.access_type.lower() == '2b'):
accessType_2B(config, env_config, igamRoleDF)
elif (config.access_type.lower() == '3'):
accessType_3(config, env_config, igamRoleDF)
else:
logger.info(f"Invalid access type {config.access_type}. Please check the input param")
def accessType_1(config, env_config, igamRoleDF):
logger.info("Grant privileges for access type 1")
logger.info("Fetch metadata from Oracle for access type 1")
# ---- Construct query and fetch from Oracle ----
queryParams = f"'{config.source_schema}.{config.source_table}'"
queryMetadataAccessType1 = tbq.get_query_metadata_access_type1(config.oracle_metadata_table)
queryWithParamsAccessType1 = applyQueryParameters(queryMetadataAccessType1, queryParams)
logger.info("Metadata table query: " )
jdbcMetaDataAccessType1DF = df=execute_oracle_query(queryWithParamsAccessType1)
# ---- Normalize columns ----
df = jdbcMetaDataAccessType1DF.copy()
df["rar3_type_of_access"] = df["rar3_type_of_access"].astype(str).str.strip()
df["source"] = df["source"].astype(str).str.strip().str.upper()
igamRoleDF["datasource"] = igamRoleDF["datasource"].astype(str).str.strip().str.upper()
# ---- Branch A: source != 'RAR' ----
left_a = (
df.loc[
(df["rar3_type_of_access"] == "1") & (df["source"] != config.service_name),
["table_name", "source"]
]
.drop_duplicates()
)
branch_a = (
left_a.merge(
igamRoleDF,
left_on="source",
right_on="datasource",
how="inner"
)
[["table_name", "source", "subsource_id", "igam_entitlement", "environment"]]
.drop_duplicates()
)
# ---- Branch B: source == 'RAR' (CROSS JOIN with igamRoleDF) ----
left_b = (
df.loc[
(df["rar3_type_of_access"] == "1") & (df["source"] == config.service_name),
["table_name", "source"]
]
.drop_duplicates()
)
if not left_b.empty:
branch_b = (
left_b.merge(igamRoleDF, how="cross")
[["table_name", "source", "subsource_id", "igam_entitlement", "environment"]]
.drop_duplicates()
)
else:
branch_b = pd.DataFrame(columns=["table_name", "source", "subsource_id", "igam_entitlement", "environment"])
# ---- UNION (distinct) ----
typeOneDF = (
pd.concat([branch_a, branch_b], ignore_index=True)
.drop_duplicates()
.reset_index(drop=True)
)
# ---- Collect IGAM entitlements ----
igam_entitlements = (
typeOneDF["igam_entitlement"]
.dropna()
.astype(str)
.str.strip()
.tolist()
)
# Extract IGAM entitlements
# Merge with optional full access list
if config.full_access_entitlement_list is None:
combined_entitlements = igam_entitlements
else:
full_access_list_clean = config.full_access_entitlement_list
combined_entitlements = igam_entitlements + full_access_list_clean
# Add table permission groups using YAMLFormatter
params = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
combined_entitlements
)
# Generate the final YAML policy
formattedYaml = ranger.generate_policy(params,env_config, None)
logger.info(f"Final YAML format")
return formattedYaml
def accessType_2A(config, env_config, igamRoleDF):
logger.info("Grant privileges for access type 2a")
logger.info("Fetch the metadata in Oracle for access type 2a")
# ---- Construct query and fetch from Oracle ----
queryParams = f"'{config.source_schema}.{config.source_table}'"
queryMetadataAccessType2a = tbq.get_query_metadata_access_type2a(config.oracle_metadata_table)
queryWithParamsAccessType2a = applyQueryParameters(queryMetadataAccessType2a, queryParams)
logger.info(f"Meta data table query: {queryWithParamsAccessType2a} ")
jdbcMetaDataAccessType2aDF = execute_oracle_query(queryWithParamsAccessType2a)
# ---- Normalize columns ----
df = jdbcMetaDataAccessType2aDF.copy()
df["rar3_type_of_access"] = df["rar3_type_of_access"].astype(str).str.strip().str.lower()
df["source"] = df["source"].astype(str).str.strip().str.upper()
print(df)
roles = igamRoleDF.copy()
# expected columns in igamRoleDF: rar_subsource_id, igam_entitlement, environment (plus anything else you keep)
roles["subsource_id"] = roles["subsource_id"].astype(str).str.strip().str.upper()
roles["igam_entitlement"] = roles["igam_entitlement"].astype(str).str.strip()
# ---- Branch A: source != service_name -> INNER JOIN on source == rar_subsource_id ----
left_a = (
df.loc[
(df["rar3_type_of_access"] == "2a")
& (df["source"] != config.service_name.upper()),
["table_name", "column_name", "source"]
]
)
branch_a = (
left_a.merge(
roles,
left_on="source",
right_on="subsource_id",
how="inner"
)
.drop(columns=["subsource_id", "source"], errors="ignore")
[["table_name", "column_name", "igam_entitlement", "environment"]]
)
# ---- Branch B: source == service_name -> CROSS JOIN with igamRoleDF ----
left_b = (
df.loc[
(df["rar3_type_of_access"] == "2a")
& (df["source"] == config.service_name.upper()),
["table_name", "column_name", "source"]
]
)
if not left_b.empty:
try:
branch_b = (
left_b.merge(roles, how="cross")
.drop(columns=["subsource_id", "source"], errors="ignore")
[["table_name", "column_name", "igam_entitlement", "environment"]]
)
except TypeError:
# pandas < 1.2 fallback
left_b["_cj"] = 1
roles["_cj"] = 1
branch_b = (
left_b.merge(roles, on="_cj")
.drop(columns=["_cj", "subsource_id", "source"], errors="ignore")
[["table_name", "column_name", "igam_entitlement", "environment"]]
)
# (optional) cleanup if you keep using roles later
roles.drop(columns=["_cj"], inplace=True, errors="ignore")
else:
branch_b = pd.DataFrame(columns=["table_name", "column_name", "igam_entitlement", "environment"])
# ---- UNION (distinct) ----
one_df = (
pd.concat([branch_a, branch_b], ignore_index=True)
.reset_index(drop=True)
)
# ---- Group 1: (table_name, igam_entitlement) -> sorted, comma-joined column_list ----
tmp = one_df.sort_values(["table_name", "igam_entitlement", "column_name"], kind="mergesort")
new_df = (
tmp.groupby(["table_name", "igam_entitlement"], as_index=False)["column_name"]
.apply(lambda s: ",".join(s.dropna().astype(str).tolist()))
.rename(columns={"column_name": "column_list"})
)
# Columns: table_name, igam_entitlement, column_list
# ---- Group 2: (table_name, column_list) -> comma-joined igam_entitlement ----
grouped = (
new_df.groupby(["table_name", "column_list"], as_index=False)["igam_entitlement"]
.apply(lambda s: ",".join(s.dropna().astype(str).tolist()))
)
# Columns: table_name, column_list, igam_entitlement
# ---- ROW_NUMBER() OVER (ORDER BY column_list) -> policy_id ----
grouped = grouped.sort_values(["column_list"], kind="mergesort")
grouped["policy_id"] = np.arange(1, len(grouped) + 1).astype(int)
# ---- Emit policies: one per (table_name, column_list) row ----
for _, row in grouped.iterrows():
entitlements_list = [e.strip() for e in str(row["igam_entitlement"]).split(",") if e.strip()]
columns_list = [c.strip() for c in str(row["column_list"]).split(",") if c.strip()]
policy_id = str(int(row["policy_id"]))
params = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type, # "2a"
config.source_table,
entitlements_list,
columns_list=columns_list
)
ranger.generate_policy(params, env_config, policy_id)
# ---- Optional: append full-access YAML if list provided on config ----
if getattr(config, "full_access_entitlement_list", None):
# If your code already provides a list, use it directly; otherwise split string.
if isinstance(config.full_access_entitlement_list, list):
full_access_list = config.full_access_entitlement_list
else:
full_access_list = [s.strip() for s in str(config.full_access_entitlement_list).split(",") if s.strip()]
params_full = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type, # keep same access type per your pattern
config.source_table,
full_access_list
)
ranger.generate_policy(params_full, env_config, "full_access")
def accessType_2B(config, env_config,igamRoleDF):
logger.info(f"Grant privileges for access type {config.access_type}")
logger.info("Fetch the metadata in Oracle for access type 2b")
# --- Validate required columns ---
required = {"environment", "igam_entitlement", "subsource_id"}
missing = required - set(igamRoleDF.columns)
if missing:
raise KeyError(f"igamRoleDF missing required column(s): {sorted(missing)}")
# --- Normalize to strings (robust against None/NaN) ---
igamRoleDF = igamRoleDF.copy()
igamRoleDF["environment"] = igamRoleDF["environment"].astype(str).str.strip()
igamRoleDF["igam_entitlement"] = igamRoleDF["igam_entitlement"].astype(str).str.strip()
igamRoleDF["subsource_id"] = igamRoleDF["subsource_id"].astype(str).str.strip()
# --- Aggregation: per (environment, igam_entitlement) collect unique subsource_id list ---
# Keep a stable order by sorting; remove empties.
agg_df = (
igamRoleDF.loc[igamRoleDF["subsource_id"].ne(""), ["environment", "igam_entitlement", "subsource_id"]]
.drop_duplicates()
.sort_values(["environment", "igam_entitlement", "subsource_id"], kind="mergesort")
.groupby(["environment", "igam_entitlement"], as_index=False)["subsource_id"]
.agg(lambda s: ",".join(s.unique()))
.rename(columns={"subsource_id": "subsource_id_list"})
)
# List of tuples (IGAM_ENTITLEMENT, subsource_id_list) — mirrors your log payload
accessType2bValidList = list(zip(
agg_df["igam_entitlement"].astype(str),
agg_df["subsource_id_list"].astype(str)
))
# --- Entitlements for policy generation (unique, non-empty) ---
igam_entitlements = (
igamRoleDF["igam_entitlement"]
.dropna()
.map(str)
.str.strip()
.loc[lambda s: s.ne("")]
.drop_duplicates()
.tolist()
)
# --- Row-level permissions (per your existing API) ---
params_row_level = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
igam_entitlements
)
# --- Table-level permissions, merging in full-access entitlements if provided ---
if getattr(config, "full_access_entitlement_list", None):
combined_entitlements = igam_entitlements + config.full_access_entitlement_list
else:
combined_entitlements = igam_entitlements
# --- Emit YAML using your helpers ---
if getattr(config, "full_access_entitlement_list", None):
params = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
combined_entitlements
)
ranger.generate_policy(params, env_config)
ranger.yaml_format_2b(params_row_level,env_config, config.full_access_entitlement_list) # row-level policy
logger.info("Final YAML format emitted for 2B.")
def accessType_3(config,env_config, igamRoleDF):
"""
Python/pandas translation of the Scala accessType_3.
Expects igamRoleDF to have at least: ['igam_entitlement', 'subsource_id'].
The `config` object should expose the attributes used below (names match your Scala/Python usage).
Uses a YAML formatter module `ranger` with:
- add_table_permission_groups(corporate_store, target_table, access_type, source_table, entitlements)
- yaml_format_3(params)
- yaml_format_1(params)
"""
# --- 1) Filter entitlements where subsource_id = 'TMS' ---
if not {"igam_entitlement", "subsource_id"}.issubset(igamRoleDF.columns):
missing = {"igam_entitlement", "subsource_id"} - set(igamRoleDF.columns)
raise KeyError(f"igamRoleDF missing required column(s): {sorted(missing)}")
new_df = (
igamRoleDF.loc[
igamRoleDF["subsource_id"].astype(str).str.upper() == "TMS",
["igam_entitlement"]
].drop_duplicates()
)
accessType3ValidList = new_df["igam_entitlement"].astype(str).str.strip().tolist()
# --- 2) Build params for row-level groups (type 3) ---
params_row_level = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
accessType3ValidList
)
corp = str(config.corporate_store).lower()
src_tbl = str(config.source_table).lower()
# --- 3) Compose the filter expressions (match Scala strings) ---
sqlCreateView3NonRestrString_Ptree = (
"(parent_fk in ( "
f"select portfolio_fk from {corp}.nh_portfolio_access "
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
"AND to_date(a_valid_to) > current_timestamp() "
")) AND (child_fk in ( "
f"select portfolio_fk from {corp}.nh_portfolio_access "
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
"AND to_date(a_valid_to) > current_timestamp() "
"))"
)
sqlCreateView3NonRestrString_Pos = (
"position_key in ( "
f"select position_key from {corp}.nh_portfolio_access a "
f"inner join {corp}.nh_position b on ( "
"(b.portfolio_fk = a.portfolio_fk and b.portfolio_fk is not NULL) or "
"(b.portfolio_compare_fk = a.portfolio_fk and b.portfolio_compare_fk is not NULL) "
") "
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
"AND to_date(a_valid_to) > current_timestamp() "
")"
)
sqlCreateView3PortAccess = "lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%')"
sqlCreateView3LimAccess = "lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%')"
# Standard case uses the configured key columns/table names
key_col = getattr(config, "type3SourceTableKeyColumn", None)
acc_col = getattr(config, "type3AccessTableKeyColumn", None)
acc_table= getattr(config, "type3AccessTable", None)
if not all([key_col, acc_col, acc_table]):
# Only needed for the default branch; keep None if your config doesn't use the default
key_col = key_col or "source_key_col"
acc_col = acc_col or "access_key_col"
acc_table = acc_table or "type3_access_table"
sqlCreateView3NonRestrString_Stdrd = (
f"{key_col} in (select {acc_col} from {corp}.{acc_table} "
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
"AND to_date(a_valid_to) > current_timestamp())"
)
# --- 4) Choose the filter by source table (matches Scala match/case) ---
if src_tbl == "nh_portfoliotree":
sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Ptree
elif src_tbl == "nh_position":
sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Pos
elif src_tbl == "nh_portfolio_access":
sqlCreateViewType3Filter = sqlCreateView3PortAccess
elif src_tbl == "nh_limit_access":
sqlCreateViewType3Filter = sqlCreateView3LimAccess
else:
sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Stdrd
# --- 5) Row filter YAML block (uses groups from params_row_level) ---
# Expecting params_row_level like {'igam_roles': '...'}; adjust key if your API differs.
igam_roles_lower = str(params_row_level.get("igam_roles", "")).lower()
rowFilter = (
"- groups:\n"
f" {igam_roles_lower}\n"
" accesses:\n"
" - select\n"
f" filterExpr: \"{sqlCreateViewType3Filter}\"\n"
" "
)
# --- 6) Handle optional full access entitlements ---
if config.full_access_entitlement_list:
paramsFullAccess = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
config.full_access_entitlement_list
)
full_groups_lower = str(paramsFullAccess.get("igam_roles", "")).lower()
"""
fullAccessFilter = (
"- groups:\n"
f" {full_groups_lower}\n"
" accesses:\n"
" - select\n"
" filterExpr: \"1=1\"\n"
" "
)
"""
params_table_level = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
accessType3ValidList + config.full_access_entitlement_list
)
else:
fullAccessFilter = ""
params_table_level = ranger.add_table_permission_groups(
config.corporate_store,
config.target_table,
config.access_type,
config.source_table,
accessType3ValidList
)
# --- 7) Render YAML and merge like Scala ---
ranger.yaml_format_3(params_row_level,env_config,sqlCreateViewType3Filter,config.full_access_entitlement_list ) # base type 3 yaml
ranger.yaml_format_1(params_table_level,env_config) # table-level yaml
#5 create extra policies for super-users
#6 refresh metadata
def run_process(env_file, env, service_name,source_schema,source_table,sentry_role_environment):
#1 receive table name and check for target table and access type
env_dict=initialize_config(env_file)
env_config=env_dict[env]
if service_name.lower()=='rqsd':
env_config["DEVO_SECRET"]=env_config["DEVO_SECRET_RQSD"]
env_config["DEVO_USERNAME"]=env_config["DEVO_USERNAME_RQSD"]
try:
devo_secret_name = env_config["DEVO_SECRET"]
env_config["DEVO_SECRET"]= get_secret(devo_secret_name)
except:
logger.error("Failed to retrieve credentials from secrets")
raise(Exception)
db_config=env_dict[service_name]
try:
target_table=get_target_table(db_config['oracle_mgmt_table'],source_schema,source_table,env)['table_alias'][0]
except Exception as e:
logger.error("Table not found in oracle management table")
logger.error("Exception: %s", e)
logger.error("Traceback:\n%s", traceback.format_exc())
raise
try:
access_type=get_type_ofAccess(db_config['oracle_metadata_table'],source_schema,source_table,env)['rar3_type_of_access'][0].strip()
except Exception as e:
logger.error("Table not found in oracle metadata inventory")
logger.error("Exception: %s", e)
logger.error("Traceback:\n%s", traceback.format_exc())
raise
args={
'corporate_store':db_config['corporate_store'],
'service_name': service_name,
'source_schema':source_schema,
'source_table':source_table,
'oracle_metadata_table':db_config['oracle_metadata_table'],
'oracle_igam_table':db_config['oracle_igam_table'],
'oracle_mgmt_table': db_config['oracle_mgmt_table'],
'target_table':target_table,
'sentry_role_environment':sentry_role_environment,
'target_s3_bucket': env_config["BUCKET_PREFIX"]+db_config['target_s3_bucket'] ,
'tech_meta_data_fields': db_config['tech_meta_data_fields'],
'full_access_entitlement_list':env_config[f"FULL_ACCESS_LIST_{service_name.upper()}"].split(','),
'access_type': access_type
}
config=fo.Options(args)
#2 load metadata
tableFields=loadMetadataTable(config,env)
igamRoles=readIGAMRoles(config,env)
#3 drop table and policies
deleteExternalTable(config,env_config)
#4 create external table and policies
if (config.target_table[-4:].upper() == '_EXT'):
createExternalTables( config, tableFields,env_config )
else:
createTableFromExternal( config, tableFields,env_config)
accessTypeMapper(config,env_config,igamRoles)
#5 refresh metadata
#execute_query(f"INVALIDATE METADATA {config.corporate_store}.{config.target_table}",env_config["DEVO_USERNAME"],env_config['IMPALA_HOSTNAME'],env_config['DEVO_SECRET'])
#execute_query(f"COMPUTE STATS {config.corporate_store}.{config.target_table}",env_config["DEVO_USERNAME"],env_config['IMPALA_HOSTNAME'],env_config['DEVO_SECRET'])
#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/env_config.yaml",'tst','mopdb','MPEC','T_MPEC','TEST/INTEGRATION')
#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'tst','rar','CORR_RAR','NH_ASSET','TEST/INTEGRATION')
#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'dev','rar','CORR_RAR','NH_LIMIT','TEST/INTEGRATION')
run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'dev','rar','CORR_RAR','NH_Asset_transactial_data'.upper(),'TEST/INTEGRATION')
"""
df=execute_oracle_query('select owner, table_name from CT_MRDS.A_DEVO_REPLICA_MGMT_RQSD')
listfail=[]
for index, row in df.iterrows():
try:
print("running table: ",row["table_name"])
run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'tst','rqsd',row['owner'],row['table_name'].upper(),'TEST/INTEGRATION')
except:
print("failed")
listfail.append(row["table_name"])
print("succeded")
print(listfail)
"""
'''{"id": 48754, "guid": "d75f1491-538d-402a-a8ac-e7e21ac0be53", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_1", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
"column": {"values": ["ASSET_FK", "ASSET_FK", "A_DWH_LOAD_SET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_FROM", "A_VALID_TO", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_TYPE_NO_ID", "CODE_VALUE", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false},
"table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"accesses": [{"type": "select", "isAllowed": true}],
"groups": ["a_mopdb_ea", "disc-au-bda"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}
{"id": 48755, "guid": "5ff857c2-3683-4178-98ce-5932c0677cd4", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_2", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
"column": {"values": ["ASSET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false},
"table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], "
groups": ["su-omd-reuters-users", "a_mopdb_excess_liquidity", "a-mora-lba-exp-a", "a_rar_csdb_reference_data", "a_mopdb_uc", "a_rar_csdb_ratings_data", "a_mopdb_credit_operations", "a_rar_fxcd_data", "a_rar_mdp_bbg_data", "disc-ac-riad_cnf_n-r", "a-mora-lba-ana-a", "a_mopdb_tms_data", "disc-ac-riad_core-r", "a_mopdb_mpec", "a-led-ana-a", "a-led-exp-a", "a_mopdb_ela_all"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}
{"id": 48756, "guid": "1071767f-8ef6-47be-bb9b-7077ed9e9a90", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_full_access", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, "column": {"values": ["*"], "isExcludes": false, "isRecursive": false}, "table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}},
"policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], "groups": ["disc-ac-rar-r"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}'''
'''
{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_1", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, "table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false},
"column": {"values": ["ASSET_FK", "ASSET_FK", "A_DWH_LOAD_SET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_FROM", "A_VALID_TO", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_TYPE_NO_ID", "CODE_VALUE", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
"groups": ["disc-tu-bda", "t_mopdb_ea"], "accesses": [{"type": "select", "isAllowed": true}]}]}
{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_2", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
"table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false},
"column": {"values": ["ASSET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
"groups": ["a-led-ana-t", "a-led-exp-t", "a-mora-lba-ana-t", "a-mora-lba-exp-t", "disc-tc-riad_cnf_n-r", "disc-tc-riad_core-r", "su-omd-reuters-users", "t_mopdb_credit_operations", "t_mopdb_ela_all", "t_mopdb_excess_liquidity", "t_mopdb_mpec", "t_mopdb_tms_data", "t_mopdb_uc", "t_rar_csdb_ratings_data", "t_rar_csdb_reference_data", "t_rar_fxcd_data", "t_rar_mdp_bbg_data"],
"accesses": [{"type": "select", "isAllowed": true}]}]}
{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_full_access", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
"table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false}, "column": {"values": ["*"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
"groups": ["disc-dc-rar-r"], "accesses": [{"type": "select", "isAllowed": true}]}]}
'''

View File

@@ -0,0 +1,130 @@
metadata_table = "DW_RAR.NH_METADATA_INVENTORY"
def get_query_metadata(metadata_table, owner, table_name):
query_metadata = (
"WITH metaDF AS ( "
"SELECT owner, table_name, column_id, column_name, data_type, data_precision, data_scale, "
"CASE WHEN data_precision IS NULL AND data_scale IS NULL THEN NULL "
"WHEN data_precision IS NOT NULL AND data_scale IS NULL THEN data_precision "
"WHEN CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN data_precision "
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN data_scale "
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN NULL "
"ELSE NULL END AS data_precision_hive, "
"CASE WHEN data_precision IS NULL AND data_scale IS NULL THEN NULL "
"WHEN data_precision IS NOT NULL AND data_scale IS NULL THEN CAST(0 AS INT) "
"WHEN CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN data_scale "
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN data_scale "
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN NULL "
"ELSE NULL END AS data_scale_hive, "
"CASE WHEN data_type LIKE '%NUMBER%' AND data_precision IS NULL AND data_scale IS NULL THEN 'String' "
"WHEN data_type LIKE '%NUMBER%' AND data_precision IS NOT NULL AND data_scale IS NULL THEN 'Decimal' "
"WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN 'Decimal' "
"WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN 'Decimal' "
"WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN 'String' "
"WHEN data_type LIKE '%CHAR%' THEN 'String' "
"WHEN data_type LIKE '%VARCHAR2%' THEN 'String' "
"WHEN data_type LIKE '%TIMESTAMP%' THEN 'String' "
"WHEN data_type LIKE '%DATE%' THEN 'String' "
"ELSE 'String' END AS data_type_hive, "
"REGEXP_REPLACE(data_description, '''', '\\''') AS data_description "
"FROM {0} "
"WHERE lower(owner||'.'||table_name) = lower('{1}'||'.'||'{2}') "
"AND a_valid_to > sysdate) "
"SELECT owner, table_name, column_id, column_name, data_type, data_precision, data_scale, "
"data_precision_hive, data_scale_hive, data_type_hive, "
"CASE WHEN data_type_hive = 'Decimal' THEN 'Decimal(' || COALESCE(CAST(data_precision_hive AS VARCHAR2(30)), '') || ',' || COALESCE(CAST(data_scale_hive AS VARCHAR2(30)), '') || ')' "
"ELSE data_type_hive END AS data_type_string, data_description "
"FROM metaDF "
"ORDER BY CAST(column_id AS INT) "
).format(metadata_table, owner, table_name)
return query_metadata
def get_query_metadata_access_type1(metadata_table):
query_metadata_access_type1 = (
"SELECT owner, table_name, list_of_sources as SOURCE, rar3_type_of_access "
"FROM {0} "
"WHERE a_valid_to > sysdate "
"AND rar3_type_of_access = '1' "
"AND list_of_sources NOT IN 'RAR' "
"AND lower(owner||'.'||table_name) = lower($$$1) "
"UNION "
"SELECT owner, table_name, list_of_sources as SOURCE, rar3_type_of_access "
"FROM {0} "
"WHERE a_valid_to > sysdate "
"AND rar3_type_of_access = '1' "
"AND owner = 'CORR_REF_MAIN' "
"AND lower(owner||'.'||table_name) = lower($$$1) "
).format(metadata_table)
return query_metadata_access_type1
def get_query_metadata_access_type2a(metadata_table):
query_metadata_access_type2a = (
"WITH rar_columns AS ( "
"SELECT owner, table_name, column_name, source, rar3_type_of_access "
"FROM ( "
"SELECT owner, table_name, column_name, rar3_type_of_access, list_of_sources, "
"tms, c2d_ea, c2d_ela, c2d_mpec, c2d_uc, ceph, lm, csdb_reference, "
"csdb_ratings, fxcd, mdp_bbg, mdp_reu, riad_cl, riad_ou, sdw_estr, sdw_fx, "
"top, rar, rtm, led, mdp_cma "
"FROM {0} "
"WHERE a_valid_to > sysdate "
"AND rar3_type_of_access = '2a' "
"AND lower(owner || '.' || table_name) = lower($$$1) "
"AND list_of_sources NOT LIKE '%,%' "
"AND upper(column_name) NOT IN ('DATABASE') "
") a "
"UNPIVOT ( "
"val FOR (source) IN ( "
"tms AS 'TMS', c2d_ea AS 'C2D_EA', c2d_ela AS 'C2D_ELA', c2d_mpec AS 'C2D_MPEC', "
"c2d_uc AS 'C2D_UC', ceph AS 'CEPH', lm AS 'LM', csdb_reference AS 'CSDB_REFERENCE', "
"csdb_ratings AS 'CSDB_RATINGS', fxcd AS 'FXCD', mdp_bbg AS 'MDP_BBG', mdp_reu AS 'MDP_REU', "
"riad_cl AS 'RIAD_CL', riad_ou AS 'RIAD_OU', sdw_estr AS 'SDW_ESTR', sdw_fx AS 'SDW_FX', "
"top AS 'TOP', rar AS 'RAR', rtm AS 'RTM', led AS 'LED', mdp_cma AS 'MDP_CMA') "
") "
"ORDER BY owner, table_name, column_name "
"), "
"dummy_entry AS ( "
"SELECT owner, table_name, 'RAR' as SOURCE, rar3_type_of_access "
"FROM rar_columns "
"FETCH FIRST ROW ONLY "
"), "
"disc_tec_fields AS ( "
"SELECT owner, table_name, 'TEC_INGESTION_DATE' AS column_name, source, rar3_type_of_access "
"FROM dummy_entry "
"UNION "
"( "
"SELECT owner, table_name, 'TEC_EXECUTION_DATE' AS column_name, source, rar3_type_of_access "
"FROM dummy_entry "
") "
"UNION "
"( "
"SELECT owner, table_name, 'TEC_RUN_ID' AS column_name, source, rar3_type_of_access "
"FROM dummy_entry "
") "
") "
"SELECT owner, table_name, column_name, source, rar3_type_of_access "
"FROM disc_tec_fields "
"UNION "
"SELECT owner, table_name, column_name, source, rar3_type_of_access "
"FROM rar_columns "
).format(metadata_table)
return query_metadata_access_type2a
def get_query_igam_roles(igam_table,service):
if service.lower() == 'rar':
service_entitlement='mrds'
elif service.lower()=='mopdb':
service_entitlement='mrds'
else:
service_entitlement='mrds'
query_igam_roles = (
"SELECT MRDS_subsource_id as Datasource, "
"MRDS_subsource_id as subsource_id, "
"MRDS_entitlement as IGAM_Entitlement, "
"environment "
"FROM {0} where lower(environment) = lower($$$1) and SERVICE_NAME='{1}'").format(igam_table,service.upper())
return query_igam_roles

View File

@@ -0,0 +1,54 @@
import os
import mrds_elt.python.devo_replicator.FlowOptions as ro
import tableBuilderProcessor_2 as tbp
# setting variables
# args = [
# 'corporate_store=crp_rar',
# 'source_schema=CORR_RAR',
# 'source_table=NH_F_RATING',
# 'target_table=NH_F_RATING',
# 'access_type=1',
# 'oracle_metadata_table=CORR_RAR.NH_METADATA_INVENTORY',
# 'oracle_igam_table=CT_REF.RAR_SOURCES_IGAM_SENTRY',
# 'sentry_role_environment=production',
# 'target_s3_bucket=s3a://devo-crp-ffppyd8q',
# 'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
# 'full_access_entitlement_list=DISC-PC-RAR-R'
# ]
# args = [
# 'corporate_store=crp_rar',
# 'source_schema=CORR_RAR',
# 'source_table=NH_ASSET',
# 'target_table=NH_ASSET',
# 'access_type=2a',
# 'oracle_metadata_table=CORR_RAR.NH_METADATA_INVENTORY',
# 'oracle_igam_table=CT_REF.RAR_SOURCES_IGAM_SENTRY',
# 'sentry_role_environment=production',
# 'target_s3_bucket=s3a://devo-crp-ffppyd8q',
# 'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
# 'full_access_entitlement_list=DISC-PC-RAR-R'
# ]
args = [
'corporate_store=crp_mopdb',
'source_schema=MPEC',
'source_table=T_MPEC',
'oracle_metadata_table=CT_MOPDB.MOPDB_METADATA_INVENTORY',
'oracle_igam_table=CT_MOPDB.MOPDB_SOURCES_IGAM_SENTRY',
'sentry_role_environment=production',
'target_s3_bucket=s3a://devo-crp-sbul3ju3/mopdb/db',
'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
'full_access_entitlement_list='
]
rar_options = ro.Options(args)
tableFields = tbp.loadMetadataTable(rar_options)
tbp.createExternalTables_CRP_RAR(rar_options,tableFields )
igamRoleDF = tbp.readIGAMRoles(rar_options)
tbp.accessTypeMapper(rar_options, igamRoleDF)