init
This commit is contained in:
0
python/devo_replicator/config/.gitkeep
Normal file
0
python/devo_replicator/config/.gitkeep
Normal file
86
python/devo_replicator/config/env_config.yaml
Normal file
86
python/devo_replicator/config/env_config.yaml
Normal file
@@ -0,0 +1,86 @@
|
||||
dev:
|
||||
DEVO_USERNAME: "ap-devo_lab-mrds"
|
||||
IMPALA_HOSTNAME: "impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site"
|
||||
HIVE_HOSTNAME: 'hs2-devo-lab21-hive01.dw-devo-lab21.om2y56.b0.cloudera.site'
|
||||
RANGER_HOSTNAME: "https://devo-lab21-dl-gateway.devo-lab.om2y56.b0.cloudera.site:443/devo-lab21-dl/cdp-proxy-api/ranger"
|
||||
BUCKET_PREFIX: "s3a://devo-crp-ffppyd8q/"
|
||||
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3tsglrzfgiyfisxchref774l5y4nrler2vn54lr3li7q"
|
||||
S3_LOCATION_URI: "https://devo-crp-ffppyd8q.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
|
||||
DEVO_USERNAME_RQSD: "ap-devo-rqsd-lab"
|
||||
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
|
||||
FULL_ACCESS_LIST_RAR: "DISC-DC-RAR-R"
|
||||
FULL_ACCESS_LIST_MOPDB: ""
|
||||
FULL_ACCESS_LIST_RQSD: ""
|
||||
tst:
|
||||
DEVO_USERNAME: "ap-devo_tst-mrds"
|
||||
IMPALA_HOSTNAME: "t-impala.devo.escb.eu"
|
||||
HIVE_HOSTNAME: "hs2-devo-tst21-hive01.dw-devo-tst21.om2y56.b0.cloudera.site"
|
||||
RANGER_HOSTNAME: "https://devo-tst21-dl-gateway.devo-tst.om2y56.b0.cloudera.site:443/devo-tst21-dl/cdp-proxy-api/ranger"
|
||||
BUCKET_PREFIX: "s3a://devo-crp-sbul3ju3/"
|
||||
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyayqqotyowhpoml3v5szkwhmtu4rq6bplpkvdruzupz3ma"
|
||||
S3_LOCATION_URI: "https://devo-crp-sbul3ju3.bucket.vpce-040b28f5818b670c1-owicl3ow.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
|
||||
DEVO_USERNAME_RQSD: "ap-devo-rqsd-tst"
|
||||
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyap6wtzobzob7qizvk4nocszlcaxhwijgzejbvryt3uzbq"
|
||||
FULL_ACCESS_LIST_RAR: "DISC-TC-RAR-R"
|
||||
FULL_ACCESS_LIST_MOPDB: ""
|
||||
FULL_ACCESS_LIST_RQSD: ""
|
||||
acc:
|
||||
DEVO_USERNAME: "ap-devo_acc-mrds"
|
||||
IMPALA_HOSTNAME: "impala-proxy-devo-acc21-impala01.dw-devo-acc21.inym23.b0.cloudera.site"
|
||||
HIVE_HOSTNAME: "hs2-devo-acc21-hive01.dw-devo-acc21.inym23.b0.cloudera.site"
|
||||
RANGER_HOSTNAME: "https://devo-acc21-dl-gateway.devo-acc.inym23.b0.cloudera.site/devo-acc21-dl/cdp-proxy-api/ranger/"
|
||||
BUCKET_PREFIX: "s3a://devo-crp-sbc9vbsu/"
|
||||
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3x3nic3vxsnpzlfshz2ubj6kekny5tvaqsnwkuh2hw2a"
|
||||
S3_LOCATION_URI: "https://devo-crp-sbc9vbsu.bucket.vpce-0bf4fa440fb60935d-6m9iqoo9.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
|
||||
DEVO_USERNAME_RQSD: "ap-devo-rqsd-acc"
|
||||
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyamzhgatnso57mubvg3c6k4ens3orcx4dieo6efukuvm4a"
|
||||
FULL_ACCESS_LIST_RAR: "DISC-AC-RAR-R"
|
||||
FULL_ACCESS_LIST_MOPDB: ""
|
||||
FULL_ACCESS_LIST_RQSD: ""
|
||||
prd:
|
||||
DEVO_USERNAME: "ap-devo_prd-mrds"
|
||||
IMPALA_HOSTNAME: "impala-proxy-devo-prd21-impala01.dw-devo-prd21.inym23.b0.cloudera.site"
|
||||
HIVE_HOSTNAME: "hs2-devo-prd21-hive01.dw-devo-prd21.inym23.b0.cloudera.site"
|
||||
RANGER_HOSTNAME: "https://devo-prd21-dl-gateway.devo-prd.inym23.b0.cloudera.site/devo-prd21-dl/cdp-proxy-api/ranger/"
|
||||
BUCKET_PREFIX: "s3a://devo-crp-2gn5maj9/"
|
||||
DEVO_SECRET: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyace73o3xowa3f3jkw4diqzoiyc6skt34sqnnx4yrbykmq"
|
||||
S3_LOCATION_URI: "https://devo-crp-2gn5maj9.bucket.vpce-0aa6cf4490536dfd5-qgy4w5sz.s3.eu-central-1.vpce.amazonaws.com/{0}/db/"
|
||||
DEVO_USERNAME_RQSD: "ap-devo-rqsd-prd"
|
||||
DEVO_SECRET_RQSD: "ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjyacodc43tfgumkw4qyzw4s3j4jp42vp2elakkpwwrmivqa"
|
||||
FULL_ACCESS_LIST_RAR: "DISC-PC-RAR-R"
|
||||
FULL_ACCESS_LIST_MOPDB: ""
|
||||
FULL_ACCESS_LIST_RQSD: ""
|
||||
|
||||
|
||||
rar:
|
||||
corporate_store: "crp_rar"
|
||||
oracle_metadata_table: "CORR_RAR.NH_METADATA_INVENTORY"
|
||||
oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
|
||||
oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_RAR"
|
||||
target_s3_bucket: "rar/db"
|
||||
tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
|
||||
mopdb:
|
||||
corporate_store: "crp_mopdb"
|
||||
oracle_metadata_table: "CT_MOPDB.MOPDB_METADATA_INVENTORY"
|
||||
oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
|
||||
oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_MOPDB"
|
||||
target_s3_bucket: "mopdb/db"
|
||||
tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
|
||||
rqsd:
|
||||
corporate_store: "crp_rqsd"
|
||||
oracle_metadata_table: "CT_MRDS.A_DEVO_METADATA_INVENTORY"
|
||||
oracle_igam_table: "CT_MRDS.A_DEVO_SOURCES_IGAM"
|
||||
oracle_mgmt_table: "CT_MRDS.A_DEVO_REPLICA_MGMT_RQSD"
|
||||
target_s3_bucket: "rqsd/db"
|
||||
tech_meta_data_fields: "tec_ingestion_date String, tec_execution_date String, tec_run_id String"
|
||||
|
||||
# -- target table name as
|
||||
# SELECT DISTINCT TABLE_ALIAS FROM {oracle_mgmt_table}
|
||||
# WHERE OWNER = ''
|
||||
# AND TABLE_NAME = '';
|
||||
|
||||
# -- type of access
|
||||
# SELECT DISTINCT RAR3_TYPE_OF_ACCESS FROM {oracle_metadata_table}
|
||||
# WHERE A_VALID_TO > SYSDATE AND
|
||||
# OWNER = ''
|
||||
# AND TABLE_NAME = '';
|
||||
0
python/devo_replicator/data_replicator/.gitkeep
Normal file
0
python/devo_replicator/data_replicator/.gitkeep
Normal file
65
python/devo_replicator/data_replicator/diag_s3_access.py
Normal file
65
python/devo_replicator/data_replicator/diag_s3_access.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys, json
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
from botocore.config import Config
|
||||
|
||||
BUCKET = "devo-crp-sbc9vbsu"
|
||||
PREFIX = "mopdb/db/" # adjust if needed
|
||||
|
||||
def show(e):
|
||||
# Print the structured error if present
|
||||
resp = getattr(e, "response", {})
|
||||
code = resp.get("Error", {}).get("Code")
|
||||
msg = resp.get("Error", {}).get("Message")
|
||||
rid = resp.get("ResponseMetadata", {}).get("RequestId")
|
||||
print(f"{type(e).__name__}: {code} {msg} (RequestId={rid})", file=sys.stderr)
|
||||
|
||||
def main(endpoint_url=None, region=None, force_path=False):
|
||||
session = boto3.Session()
|
||||
cfg = Config(s3={"addressing_style": "path" if force_path else "auto"})
|
||||
s3 = session.client("s3", region_name=region, endpoint_url=endpoint_url, config=cfg)
|
||||
sts = session.client("sts", region_name=region)
|
||||
|
||||
# Who am I?
|
||||
try:
|
||||
ident = sts.get_caller_identity()
|
||||
print(f"Caller: {ident['Arn']} (acct {ident['Account']})")
|
||||
except Exception as e:
|
||||
print("Could not call STS get-caller-identity — credentials not valid for STS.", file=sys.stderr)
|
||||
show(e); return 1
|
||||
|
||||
# Is the bucket reachable at all?
|
||||
try:
|
||||
s3.head_bucket(Bucket=BUCKET)
|
||||
print(f"head_bucket OK on s3://{BUCKET}")
|
||||
except ClientError as e:
|
||||
print("head_bucket failed:", file=sys.stderr)
|
||||
show(e); return 2
|
||||
|
||||
# List with zero keys to test just the ListBucket permission
|
||||
try:
|
||||
s3.list_objects_v2(Bucket=BUCKET, Prefix=PREFIX, MaxKeys=0)
|
||||
print(f"list_objects_v2 OK on prefix '{PREFIX}' (permission exists)")
|
||||
except ClientError as e:
|
||||
print("list_objects_v2 failed:", file=sys.stderr)
|
||||
show(e); return 3
|
||||
|
||||
# Ask for 1 key to confirm data path works
|
||||
try:
|
||||
resp = s3.list_objects_v2(Bucket=BUCKET, Prefix=PREFIX, MaxKeys=1)
|
||||
print("First key:", resp.get("Contents", [{}])[0].get("Key"))
|
||||
except ClientError as e:
|
||||
print("list_objects_v2 (MaxKeys=1) failed:", file=sys.stderr)
|
||||
show(e); return 4
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Allow optional args: --endpoint-url URL --region eu-central-1 --force-path
|
||||
url = None; reg = None; force = False
|
||||
for i,a in enumerate(sys.argv):
|
||||
if a == "--endpoint-url": url = sys.argv[i+1]
|
||||
if a == "--region": reg = sys.argv[i+1]
|
||||
if a == "--force-path": force = True
|
||||
sys.exit(main(endpoint_url=url, region=reg, force_path=force))
|
||||
129
python/devo_replicator/data_replicator/impala_refresher.py
Normal file
129
python/devo_replicator/data_replicator/impala_refresher.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import os
|
||||
import yaml
|
||||
import datetime
|
||||
import pandas as pd
|
||||
from mrds.utils.secrets import get_secret
|
||||
import mrds.utils.manage_runs as runManager
|
||||
import mrds.utils.manage_files as fileManager
|
||||
import mrds.utils.sql_statements as sqls
|
||||
|
||||
import oci
|
||||
|
||||
from impala.dbapi import (
|
||||
connect,
|
||||
ProgrammingError,
|
||||
DatabaseError,
|
||||
IntegrityError,
|
||||
OperationalError,
|
||||
)
|
||||
from impala.error import HiveServer2Error
|
||||
|
||||
|
||||
def get_impala_connection(hostname: str, user: str, secret: str):
|
||||
conn = connect(
|
||||
host=hostname,
|
||||
port=443,
|
||||
auth_mechanism="PLAIN",
|
||||
user=user,
|
||||
password=secret,
|
||||
use_http_transport=True,
|
||||
http_path="cliservice",
|
||||
use_ssl=True
|
||||
)
|
||||
return conn
|
||||
|
||||
def execute_query(query: str,user,hostname,password):
|
||||
conn = get_impala_connection(hostname, user, password)
|
||||
print(conn)
|
||||
columns, result = execute_devo_query(query, conn)
|
||||
return columns, result
|
||||
|
||||
def execute_devo_query(query: str, conn):
|
||||
#impersonation_configuration = {"impala.doas.user": userid} # to be changed
|
||||
#impersonation_configuration = {} # to be changed
|
||||
cursor = conn.cursor()
|
||||
print("executing query")
|
||||
try:
|
||||
cursor.execute(query)
|
||||
# Check if the query is a SELECT query (i.e., reads data)
|
||||
return None, cursor.rowcount # rowcount returns the number of rows affected
|
||||
except OperationalError as oe:
|
||||
raise Exception(
|
||||
status_code=500, detail="Failed to connect to Impala: " + str(oe)
|
||||
)
|
||||
|
||||
except ProgrammingError as pe:
|
||||
raise Exception(status_code=400, detail="Query syntax error: " + str(pe))
|
||||
|
||||
except IntegrityError as ie:
|
||||
raise Exception(
|
||||
status_code=403, detail="Insufficient permissions: " + str(ie)
|
||||
)
|
||||
|
||||
except DatabaseError as db_err:
|
||||
raise Exception(status_code=500, detail="Database error: " + str(db_err))
|
||||
|
||||
except HiveServer2Error as au_err:
|
||||
raise Exception(
|
||||
status_code=403, detail="HiveServer2Error error: " + str(au_err)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
status_code=500, detail="An unexpected error occurred: " + str(e)
|
||||
) from e
|
||||
|
||||
finally:
|
||||
try:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
status_code=500, detail="Failed to close the connection: " + str(e)
|
||||
)
|
||||
|
||||
def initialize_task(workflow_context, task_name):
|
||||
# Initialize task
|
||||
a_task_history_key = runManager.init_task(
|
||||
task_name,
|
||||
workflow_context["run_id"],
|
||||
workflow_context["a_workflow_history_key"],
|
||||
)
|
||||
|
||||
return a_task_history_key
|
||||
|
||||
def initialize_config(config_file_path):
|
||||
# Ensure the file exists
|
||||
if not os.path.exists(config_file_path):
|
||||
raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
|
||||
|
||||
# Load the configuration
|
||||
with open(config_file_path, "r") as f:
|
||||
config_data = yaml.safe_load(f)
|
||||
|
||||
return config_data
|
||||
|
||||
def main(env_config_path, env, table, corporate_store):
|
||||
|
||||
#init setup
|
||||
envs_info = initialize_config(env_config_path)
|
||||
environment_info = envs_info[env]
|
||||
|
||||
try:
|
||||
devo_secret_name = environment_info["DEVO_SECRET"]
|
||||
password = get_secret(devo_secret_name)
|
||||
except:
|
||||
print("Failed to retrieve credentials from secrets")
|
||||
raise(Exception)
|
||||
# get devo data
|
||||
try:
|
||||
execute_query(f"INVALIDATE METADATA {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
|
||||
execute_query(f"COMPUTE STATS {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
|
||||
|
||||
except:
|
||||
print("Failed to retrieve DEVO data, error during connection or request")
|
||||
raise(Exception)
|
||||
return True
|
||||
|
||||
128
python/devo_replicator/data_replicator/list_s3_files_fast.py
Normal file
128
python/devo_replicator/data_replicator/list_s3_files_fast.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse, sys
|
||||
from urllib.parse import urlparse
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError, EndpointConnectionError, NoCredentialsError, ReadTimeoutError, ConnectTimeoutError
|
||||
|
||||
def parse_s3_uri(s3_uri: str):
|
||||
if not s3_uri.startswith("s3://"):
|
||||
raise ValueError("S3 URI must start with 's3://'")
|
||||
p = urlparse(s3_uri)
|
||||
if not p.netloc:
|
||||
raise ValueError("Missing bucket in S3 URI")
|
||||
return p.netloc, p.path.lstrip("/")
|
||||
|
||||
def parse_location(location: str):
|
||||
"""Accept s3://... OR https://...amazonaws.com/... and return (bucket, prefix)."""
|
||||
if location.startswith("s3://"):
|
||||
return parse_s3_uri(location)
|
||||
|
||||
if location.startswith(("http://", "https://")):
|
||||
p = urlparse(location)
|
||||
host = p.netloc
|
||||
path = p.path.lstrip("/")
|
||||
# Bucket-scoped VPCe host: <bucket>.bucket.vpce-xxxx.s3.<region>.vpce.amazonaws.com
|
||||
if ".bucket." in host:
|
||||
bucket = host.split(".bucket.", 1)[0]
|
||||
return bucket, path
|
||||
# Virtual-hosted: <bucket>.s3.<region>...
|
||||
if ".s3." in host and not host.startswith("s3."):
|
||||
bucket = host.split(".s3.", 1)[0]
|
||||
return bucket, path
|
||||
# Path-style: s3.<region>.../<bucket>/...
|
||||
if host.startswith("s3."):
|
||||
parts = path.split("/", 1)
|
||||
bucket = parts[0]
|
||||
prefix = parts[1] if len(parts) > 1 else ""
|
||||
return bucket, prefix
|
||||
|
||||
raise ValueError(f"Unsupported location: {location}")
|
||||
|
||||
def iter_keys(s3, bucket: str, prefix: str, page_size: int, max_items: int, verbose: bool):
|
||||
print('here')
|
||||
paginator = s3.get_paginator("list_objects_v2")
|
||||
kwargs = {"Bucket": bucket, "Prefix": prefix}
|
||||
pagination = {"PageSize": page_size}
|
||||
if max_items > 0:
|
||||
pagination["MaxItems"] = max_items
|
||||
|
||||
total = 0
|
||||
page_num = 0
|
||||
for page in paginator.paginate(**kwargs, PaginationConfig=pagination):
|
||||
page_num += 1
|
||||
contents = page.get("Contents", []) or []
|
||||
if verbose:
|
||||
print(f"[page {page_num}] fetched {len(contents)} keys (running total={total + len(contents)})",
|
||||
file=sys.stderr, flush=True)
|
||||
for obj in contents:
|
||||
key = obj["Key"]
|
||||
yield key
|
||||
total += 1
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="List files under an S3 location quickly and safely.")
|
||||
ap.add_argument("location", help="s3://bucket/prefix/ OR https://<vpc-endpoint-host>/<prefix>")
|
||||
ap.add_argument("--region", default=None, help="AWS region (e.g., eu-central-1)")
|
||||
ap.add_argument("--profile", default=None, help="AWS profile to use")
|
||||
ap.add_argument("--endpoint-url", default=None,
|
||||
help="Custom S3 endpoint (e.g., https://s3.eu-central-1.vpce.amazonaws.com)")
|
||||
ap.add_argument("--force-path-addressing", action="store_true",
|
||||
help="Force path-style addressing (useful with bucket-scoped VPCe hostnames)")
|
||||
ap.add_argument("--page-size", type=int, default=1000, help="S3 page size (default 1000)")
|
||||
ap.add_argument("--max-items", type=int, default=0, help="Stop after N keys (0 = no limit)")
|
||||
ap.add_argument("--connect-timeout", type=float, default=10.0, help="Seconds (default 10)")
|
||||
ap.add_argument("--read-timeout", type=float, default=30.0, help="Seconds (default 30)")
|
||||
ap.add_argument("--retries", type=int, default=3, help="Max retry attempts (default 3)")
|
||||
ap.add_argument("--relative", action="store_true", help="Print keys relative to the prefix")
|
||||
ap.add_argument("--verbose", "-v", action="store_true", help="Print progress to stderr")
|
||||
args = ap.parse_args()
|
||||
|
||||
bucket, prefix = parse_location(args.location)
|
||||
|
||||
# Session & client with explicit timeouts and optional path addressing
|
||||
sess_kwargs = {}
|
||||
if args.profile:
|
||||
sess_kwargs["profile_name"] = args.profile
|
||||
session = boto3.Session(**sess_kwargs)
|
||||
|
||||
cfg = Config(
|
||||
connect_timeout=args.connect_timeout,
|
||||
read_timeout=args.read_timeout,
|
||||
retries={"max_attempts": args.retries, "mode": "standard"},
|
||||
s3={"addressing_style": "path" if args.force_path_addressing else "auto"},
|
||||
)
|
||||
|
||||
s3 = session.client("s3", region_name=args.region, endpoint_url=args.endpoint_url, config=cfg)
|
||||
|
||||
# Quick preflight: try a 0-key list to surface auth/endpoint issues fast
|
||||
try:
|
||||
_ = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, MaxKeys=0)
|
||||
except ClientError as e:
|
||||
print(f"Preflight failed (auth/permissions/endpoint): {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except (EndpointConnectionError, ReadTimeoutError, ConnectTimeoutError) as e:
|
||||
print(f"Network/endpoint error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
for key in iter_keys(s3, bucket, prefix, args.page_size, args.max_items, args.verbose):
|
||||
if args.relative and prefix and key.startswith(prefix):
|
||||
print(key[len(prefix):].lstrip("/"))
|
||||
else:
|
||||
print(f"s3://{bucket}/{key}")
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted.", file=sys.stderr)
|
||||
sys.exit(130)
|
||||
except NoCredentialsError:
|
||||
print("No AWS credentials found. Set env vars or use --profile.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except (EndpointConnectionError, ReadTimeoutError, ConnectTimeoutError) as e:
|
||||
print(f"Network/timeout listing objects: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except ClientError as e:
|
||||
print(f"AWS error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
python/devo_replicator/table_generator/.gitkeep
Normal file
0
python/devo_replicator/table_generator/.gitkeep
Normal file
145
python/devo_replicator/table_generator/FlowOptions.py
Normal file
145
python/devo_replicator/table_generator/FlowOptions.py
Normal file
@@ -0,0 +1,145 @@
|
||||
class Options:
|
||||
def __init__(self, args):
|
||||
self.options = {
|
||||
"corporate_store": None,
|
||||
"service_name": None,
|
||||
"source_schema": None,
|
||||
"source_table": None,
|
||||
"access_type": None,
|
||||
"oracle_metadata_table": None,
|
||||
"oracle_igam_table": None,
|
||||
"query_metadata_access_type1": None,
|
||||
"query_metadata_access_type2a": None,
|
||||
"query_igam_roles": None,
|
||||
"ora_jdbc_url_dwh": None,
|
||||
"ora_jdbc_url_ods": None,
|
||||
"sql_file_path": None,
|
||||
"sql_filename_grants": None,
|
||||
"sentry_role_environment": None,
|
||||
"ranger_script": None,
|
||||
"type3_access_table": None,
|
||||
"type3_access_table_key_column": None,
|
||||
"type3_source_table_key_column": None,
|
||||
"target_s3_bucket": None,
|
||||
"ranger_s3_bucket": None,
|
||||
"ranger_s3_path": None,
|
||||
"rar_full_access_entitlement_list": None,
|
||||
"target_table": None,
|
||||
"tech_meta_data_fields": None,
|
||||
"full_access_entitlement_list": None
|
||||
}
|
||||
# Initialize options from arguments
|
||||
self.initialize_options(args)
|
||||
|
||||
def initialize_options(self, args):
|
||||
# Assuming args is a list of key-value pairs
|
||||
for key in args.keys():
|
||||
if key in self.options:
|
||||
self.options[key] = args[key]
|
||||
|
||||
def get_option_value(self, key):
|
||||
return self.options.get(key, "")
|
||||
|
||||
@property
|
||||
def corporate_store(self):
|
||||
return self.get_option_value("corporate_store")
|
||||
|
||||
@property
|
||||
def source_schema(self):
|
||||
return self.get_option_value("source_schema")
|
||||
|
||||
@property
|
||||
def source_table(self):
|
||||
return self.get_option_value("source_table")
|
||||
|
||||
@property
|
||||
def access_type(self):
|
||||
return self.get_option_value("access_type")
|
||||
|
||||
@property
|
||||
def oracle_metadata_table(self):
|
||||
return self.get_option_value("oracle_metadata_table")
|
||||
|
||||
@property
|
||||
def oracle_igam_table(self):
|
||||
return self.get_option_value("oracle_igam_table")
|
||||
|
||||
@property
|
||||
def query_metadata_access_type1(self):
|
||||
return self.get_option_value("query_metadata_access_type1")
|
||||
|
||||
@property
|
||||
def query_metadata_access_type2a(self):
|
||||
return self.get_option_value("query_metadata_access_type2a")
|
||||
|
||||
@property
|
||||
def query_igam_roles(self):
|
||||
return self.get_option_value("query_igam_roles")
|
||||
|
||||
@property
|
||||
def ora_jdbc_url_dwh(self):
|
||||
return self.get_option_value("ora_jdbc_url_dwh")
|
||||
|
||||
@property
|
||||
def ora_jdbc_url_ods(self):
|
||||
return self.get_option_value("ora_jdbc_url_ods")
|
||||
|
||||
@property
|
||||
def sql_file_path(self):
|
||||
return self.get_option_value("sql_file_path")
|
||||
|
||||
@property
|
||||
def sql_filename_grants(self):
|
||||
return self.get_option_value("sql_filename_grants")
|
||||
|
||||
@property
|
||||
def sentry_role_environment(self):
|
||||
return self.get_option_value("sentry_role_environment")
|
||||
|
||||
@property
|
||||
def ranger_script(self):
|
||||
return self.get_option_value("ranger_script")
|
||||
|
||||
@property
|
||||
def type3_access_table(self):
|
||||
return self.get_option_value("type3_access_table")
|
||||
|
||||
@property
|
||||
def type3_access_table_key_column(self):
|
||||
return self.get_option_value("type3_access_table_key_column")
|
||||
|
||||
@property
|
||||
def type3_source_table_key_column(self):
|
||||
return self.get_option_value("type3_source_table_key_column")
|
||||
|
||||
@property
|
||||
def target_s3_bucket(self):
|
||||
return self.get_option_value("target_s3_bucket")
|
||||
|
||||
@property
|
||||
def ranger_s3_bucket(self):
|
||||
return self.get_option_value("ranger_s3_bucket")
|
||||
|
||||
@property
|
||||
def ranger_s3_path(self):
|
||||
return self.get_option_value("ranger_s3_path")
|
||||
|
||||
@property
|
||||
def rar_full_access_entitlement_list(self):
|
||||
return self.get_option_value("rar_full_access_entitlement_list")
|
||||
|
||||
@property
|
||||
def target_table(self):
|
||||
return self.get_option_value("target_table")
|
||||
|
||||
@property
|
||||
def tech_meta_data_fields(self):
|
||||
return self.get_option_value("tech_meta_data_fields")
|
||||
|
||||
@property
|
||||
def full_access_entitlement_list(self):
|
||||
return self.get_option_value("full_access_entitlement_list")
|
||||
|
||||
@property
|
||||
def service_name(self):
|
||||
return self.get_option_value("service_name")
|
||||
73
python/devo_replicator/table_generator/devo_query.py
Normal file
73
python/devo_replicator/table_generator/devo_query.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from impala.dbapi import (
|
||||
connect,
|
||||
ProgrammingError,
|
||||
DatabaseError,
|
||||
IntegrityError,
|
||||
OperationalError,
|
||||
)
|
||||
from impala.error import HiveServer2Error
|
||||
|
||||
|
||||
def get_DEVO_connection(hostname: str, user: str, secret: str):
|
||||
conn = connect(
|
||||
host=hostname,
|
||||
port=443,
|
||||
auth_mechanism="PLAIN",
|
||||
user=user,
|
||||
password=secret,
|
||||
use_http_transport=True,
|
||||
http_path="cliservice",
|
||||
use_ssl=True,
|
||||
)
|
||||
return conn
|
||||
|
||||
|
||||
def execute_devo_query(query: str, conn):
|
||||
cursor = None
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(query) # Check if the query is a SELECT query (i.e., reads data)
|
||||
if query.strip().lower().startswith("select"):
|
||||
rows = cursor.fetchall()
|
||||
columns = [col[0] for col in cursor.description]
|
||||
return columns, rows
|
||||
else:
|
||||
# For non-SELECT queries (e.g., INSERT, UPDATE, DELETE), just return affected rows
|
||||
return None, cursor.rowcount # rowcount returns the number of rows affected
|
||||
|
||||
except OperationalError as oe:
|
||||
raise Exception("Failed to connect to DEVO: " + str(oe))
|
||||
|
||||
except ProgrammingError as pe:
|
||||
raise Exception("Query syntax error: " + str(pe))
|
||||
|
||||
except IntegrityError as ie:
|
||||
raise Exception("Insufficient permissions: " + str(ie))
|
||||
|
||||
except DatabaseError as db_err:
|
||||
raise Exception("Database error: " + str(db_err))
|
||||
|
||||
except HiveServer2Error as au_err:
|
||||
raise Exception("HiveServer2Error error: " + str(au_err))
|
||||
|
||||
finally:
|
||||
try:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if not conn:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
raise Exception(status_code=500, detail=f"Failed to close the cursor or impala connection: {str(e)}") from e
|
||||
|
||||
def execute_query(query: str, user: str, hostname: str,password):
|
||||
conn = get_DEVO_connection(hostname, user, password)
|
||||
columns, result = execute_devo_query(query, conn)
|
||||
return columns, result
|
||||
|
||||
|
||||
#sql="CREATE EXTERNAL TABLE IF NOT EXISTS crp_rar.testInternalTable ( iid STRING,RANDOM_DATE DATE, number int) ;"
|
||||
#sql_drop="DROP TABLE IF EXISTS crp_rar.NH_PRICE"
|
||||
|
||||
#print( execute_query("SELECT 1","ap-informatica-ipcwt","t-impala.devo.escb.eu","Start_123456789"))
|
||||
#print( execute_query("SELECT 1","ap-devo_tst-mrds","t-impala.devo.escb.eu","V1XqZ*#fvwQl=nRG*idI"))
|
||||
#print( execute_query("SELECT 1","ap-devo_lab-mrds","impala-proxy-devo-lab21-impala01.dw-devo-lab21.om2y56.b0.cloudera.site","PHkvyVonyePAmZD8wUuw!"))
|
||||
69
python/devo_replicator/table_generator/ranger_test.py
Normal file
69
python/devo_replicator/table_generator/ranger_test.py
Normal file
@@ -0,0 +1,69 @@
|
||||
## Step 3: Let's create a policy
|
||||
|
||||
from apache_ranger.model.ranger_service import *
|
||||
from apache_ranger.client.ranger_client import *
|
||||
from apache_ranger.model.ranger_policy import *
|
||||
from mrds.utils.secrets import get_secret
|
||||
|
||||
## Step 1: create a client to connect to Apache Ranger admin
|
||||
ranger_url ="https://devo-lab21-dl-gateway.devo-lab.om2y56.b0.cloudera.site:443/devo-lab21-dl/cdp-proxy-api/ranger"
|
||||
password= get_secret("ocid1.vaultsecret.oc1.eu-frankfurt-1.amaaaaaa2ky4jjya3tsglrzfgiyfisxchref774l5y4nrler2vn54lr3li7q")
|
||||
ranger_auth = ('ap-devo_lab-mrds', password)
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
ranger = RangerClient(ranger_url, ranger_auth)
|
||||
ranger.session.verify = False
|
||||
|
||||
# to disable SSL certificate validation (not recommended for production use!)
|
||||
#
|
||||
# ranger.session.verify = False
|
||||
|
||||
|
||||
## Step 2: Let's create a service
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #da hardcodare
|
||||
policy.name = 'cpo_crp_mopdb_sgroi_1' #corporatestore_table_accessType
|
||||
policy.resources = { 'database': RangerPolicyResource({ 'values': ['crp_RQSD'] }),
|
||||
'table': RangerPolicyResource({ 'values': ['ANNEX_1_1_ALL'] }),
|
||||
'column': RangerPolicyResource({ 'values': ['*'] }) } #change with correct values
|
||||
allowItem1 = RangerPolicyItem() #to try allowItem1.groups
|
||||
allowItem1.groups = ["d_mopdb_mpec"]
|
||||
#allowItem1.users = [] #to try for single users
|
||||
allowItem1.accesses = [ RangerPolicyItemAccess({ 'type': 'create' }),
|
||||
RangerPolicyItemAccess({ 'type': 'alter' }),
|
||||
RangerPolicyItemAccess({ 'type': 'select' }),
|
||||
RangerPolicyItemAccess({ 'type': 'drop' }) ]
|
||||
|
||||
"""denyItem1 = RangerPolicyItem()
|
||||
denyItem1.users = [ 'admin' ] #does it make sense to deny and not allow?
|
||||
denyItem1.accesses = [ RangerPolicyItemAccess({ 'type': 'drop' }) ]"""
|
||||
|
||||
policy.policyItems = [ allowItem1 ]
|
||||
#policy.denyPolicyItems = [ denyItem1 ]
|
||||
#policy2=ranger.get_policy_by_id(policyId=5086)
|
||||
#print(ranger.get_policy(serviceName="cm_hive",policyName='crp_rar_testinternalTable_alcesso1'))
|
||||
#print(ranger.find_policies({"service": "cm_hive", "resources": {"database": {"values": ["crp_rar"], "isExcludes": False , "isRecursive": False}, "column": {"values": ["*"], "isExcludes": False, "isRecursive": False}, "table": {"values": ["testInternalTable"], "isExcludes": False, "isRecursive": False}}}))
|
||||
#print(ranger.delete_policy(serviceName="cm_hive",policyName="crp_rar_testinternalTable_alcesso1"))
|
||||
#print(policy2)
|
||||
#print('Creating policy: name=' + policy.name)
|
||||
#created_policy = ranger.create_policy(policy)
|
||||
|
||||
#print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
|
||||
## Step 4: Delete policy and service created above
|
||||
#print('Deleting policy: id=' + str(created_policy.id))
|
||||
|
||||
#ranger.delete_policy_by_id(created_policy.id)
|
||||
|
||||
data=ranger.get_policies_in_service(serviceName="cm_hive")
|
||||
with open("output.txt", "w") as file:
|
||||
for string in data:
|
||||
file.write(str(string))
|
||||
file.close()
|
||||
|
||||
250
python/devo_replicator/table_generator/ranger_updater.py
Normal file
250
python/devo_replicator/table_generator/ranger_updater.py
Normal file
@@ -0,0 +1,250 @@
|
||||
from typing import List, Optional
|
||||
from apache_ranger.model.ranger_service import *
|
||||
from apache_ranger.client.ranger_client import *
|
||||
from apache_ranger.model.ranger_policy import *
|
||||
import re
|
||||
def add_table_permission_groups(corporate_store: str, target_table: str, access_type: str, source_table: str, igam_entitlement_list: List[str], columns_list: Optional[List[str]] = None, row_list: Optional[List[str]] = None):
|
||||
igam_entitlements = igam_entitlement_list + ["public"] if source_table.lower() == "rar_sources_igam_sentry" else igam_entitlement_list
|
||||
column_details = columns_list if columns_list is not None else ["*"]
|
||||
columns = column_details
|
||||
row_filter = row_list if row_list is not None else ["*"]
|
||||
filter_condition = ','.join([f"'{row}'" for row in row_filter])
|
||||
igam_roles = [x.lower() for x in igam_entitlements if x !=""]
|
||||
|
||||
return {
|
||||
'corporate_store': corporate_store,
|
||||
'target_table': target_table,
|
||||
'access_type': access_type,
|
||||
'columns': columns,
|
||||
'rows': filter_condition,
|
||||
'igam_roles': igam_roles
|
||||
}
|
||||
|
||||
from typing import List, Optional
|
||||
# --- helpers ---------------------------------------------------------------
|
||||
|
||||
def _policy_name_from_params(config, policy_id: Optional[str] = None) -> Optional[str]:
|
||||
"""
|
||||
Build the exact policy name used by your create functions.
|
||||
Returns None for types where we need to match multiple (e.g., 2a without id).
|
||||
"""
|
||||
cs = config['corporate_store'].lower()
|
||||
tbl = config['target_table'].lower()
|
||||
at = config['access_type'].lower()
|
||||
base = f"cpo_{cs}_{tbl}_{at}"
|
||||
|
||||
if at == "1":
|
||||
# yaml_format_1
|
||||
return base
|
||||
elif at == "2a":
|
||||
# yaml_format_2a -> requires policy_id to be exact
|
||||
if policy_id:
|
||||
return f"{base}_policy_{policy_id}"
|
||||
# without policy_id, we’ll delete all that start with this prefix
|
||||
return None
|
||||
elif at == "2b":
|
||||
# yaml_format_2b
|
||||
return f"{base}_row_level_policy"
|
||||
elif at == "3":
|
||||
# yaml_format_3 uses same name pattern as 2b in your script
|
||||
return f"{base}_row_level_policy"
|
||||
else:
|
||||
raise ValueError(f"Invalid access type '{config['access_type']}'. Expected one of: 1, 2a, 2b, 3.")
|
||||
|
||||
|
||||
def _ranger_client(env_config) -> RangerClient:
|
||||
ranger_url = env_config['RANGER_HOSTNAME']
|
||||
ranger_auth = ( env_config['DEVO_USERNAME'], env_config['DEVO_SECRET'])
|
||||
client = RangerClient(ranger_url, ranger_auth)
|
||||
client.session.verify = False
|
||||
return client
|
||||
|
||||
|
||||
# --- main deletion API -----------------------------------------------------
|
||||
|
||||
def delete_policy(config,env_config, policy_id: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Delete Ranger policy/policies by name based on:
|
||||
- params['corporate_store']
|
||||
- params['target_table']
|
||||
- typeOfAccess: "1", "2a", "2b", "3"
|
||||
- policy_id: optional (only meaningful for '2a')
|
||||
|
||||
Returns a list of deleted policy names.
|
||||
"""
|
||||
ranger = _ranger_client(env_config)
|
||||
service_name = "cm_hive"
|
||||
|
||||
# Try build exact name
|
||||
deleted: List[str] = []
|
||||
|
||||
|
||||
# If we don’t have an exact name (e.g. type 2a without policy_id),
|
||||
# delete *all* that match the expected prefix.
|
||||
cs = config['corporate_store'].lower()
|
||||
tbl = config['target_table'].lower()
|
||||
at = config['access_type'].lower()
|
||||
prefix = f"cpo_{cs}_{tbl}_"
|
||||
print(prefix)
|
||||
# Fetch all policies for the table and filter client-side to reduce calls.
|
||||
start = 0
|
||||
candidates = []
|
||||
page_size=1000
|
||||
service_name="cm_hive"
|
||||
while True:
|
||||
params = {"pageSize": page_size, "startIndex": start}
|
||||
page = ranger.get_policies_in_service(service_name, params=params) or []
|
||||
candidates.extend(page)
|
||||
if len(page) < page_size:
|
||||
break
|
||||
start += len(page)
|
||||
for p in candidates:
|
||||
name = p["name"]
|
||||
print(f"analizing policy:{name}")
|
||||
if re.fullmatch(f"{prefix}([0-9]?[a-z]?)(_policy_)?([0-9]*)?(_row_level_policy)?(full_access)?$",name) != None:
|
||||
try:
|
||||
ranger.delete_policy_by_id(p["id"])
|
||||
deleted.append(name)
|
||||
except Exception:
|
||||
# continue attempting others
|
||||
pass
|
||||
if not deleted:
|
||||
raise RuntimeError(
|
||||
f"No matching policies found for deletion with prefix '{prefix}'. "
|
||||
)
|
||||
return deleted
|
||||
|
||||
|
||||
|
||||
def generate_policy(params,env_config, policy_id: Optional[str] = None):
|
||||
access_type = params['access_type'].lower()
|
||||
if access_type == "1":
|
||||
return yaml_format_1(params,env_config)
|
||||
elif access_type == "2a":
|
||||
return yaml_format_2a(params, env_config, policy_id)
|
||||
elif access_type == "2b":
|
||||
return yaml_format_1(params,env_config)
|
||||
elif access_type == "3":
|
||||
return yaml_format_3(params)
|
||||
else:
|
||||
raise Exception(f"Invalid access type {params['access_type']}. Please check the input param")
|
||||
|
||||
def yaml_format_1(params,env_config) -> str:
|
||||
ranger=_ranger_client(env_config)
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #harcoded
|
||||
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}" #corporatestore_table_accessType
|
||||
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
|
||||
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
|
||||
'column': RangerPolicyResource({ 'values': params['columns'] }) }
|
||||
|
||||
allowItem1 = RangerPolicyItem()
|
||||
allowItem1.groups = params['igam_roles']
|
||||
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
policy.policyItems = [ allowItem1 ]
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print('Created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
return policy
|
||||
|
||||
def yaml_format_2a(params, env_config,policy_id: Optional[str]) -> str:
|
||||
policy_ID = policy_id if policy_id is not None else "0"
|
||||
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
ranger = _ranger_client(env_config)
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #harcoded
|
||||
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}" #corporatestore_table_accessType
|
||||
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
|
||||
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
|
||||
'column': RangerPolicyResource({ 'values': params['columns'] }) }
|
||||
allowItem1 = RangerPolicyItem()
|
||||
allowItem1.groups = params['igam_roles']
|
||||
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
policy.policyItems = [ allowItem1 ]
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
return policy
|
||||
|
||||
def yaml_format_2b(params,env_config, full_access_list: Optional[List]) -> str:
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
ranger = _ranger_client(env_config)
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #harcoded
|
||||
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy" #corporatestore_table_accessType
|
||||
policy.isEnabled = True
|
||||
policy.resources ={ 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
|
||||
'table': RangerPolicyResource({ 'values': [params['target_table']] })}
|
||||
rowFilterAllowItem1= RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem1.groups = params['igam_roles']
|
||||
rowFilterAllowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
rowFilterAllowItem1.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"lower(source) IN (select lower(rar_subsource_id) from {params['corporate_store'].lower()}.t_ref_rar_sources_igam_sentry where lower(rar_igam_entitlement) IN (select ad_group from {params['corporate_store'].lower()}.active_directory_user_groups where username = lower(regexp_extract(current_user(),'[^@]*',0))))" })
|
||||
rowFilterAllowItem2= RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
|
||||
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
|
||||
policy.rowFilterPolicyItems= [rowFilterAllowItem1, rowFilterAllowItem2]
|
||||
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
return policy
|
||||
|
||||
|
||||
def yaml_format_3(params, env_config,filterString, full_access_list: Optional[List]) -> str:
|
||||
|
||||
ranger = _ranger_client(env_config)
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" # hardcoded
|
||||
policy.name = (
|
||||
f"cpo_{params['corporate_store'].lower()}_"
|
||||
f"{params['target_table'].lower()}_"
|
||||
f"{params['access_type'].lower()}_row_level_policy"
|
||||
)
|
||||
policy.isEnabled = True
|
||||
policy.resources = {
|
||||
"database": RangerPolicyResource({"values": [params["corporate_store"].lower()]}),
|
||||
"table": RangerPolicyResource({"values": [params["target_table"]]}),
|
||||
}
|
||||
|
||||
# Row filter item
|
||||
rowFilterAllowItem = RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem.groups = params["igam_roles"]
|
||||
rowFilterAllowItem.accesses = [RangerPolicyItemAccess({"type": "select"})]
|
||||
rowFilterAllowItem.rowFilterInfo = RangerPolicyItemRowFilterInfo(
|
||||
{
|
||||
"filterExpr": filterString
|
||||
}
|
||||
)
|
||||
rowFilterAllowItem2= RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
|
||||
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
|
||||
|
||||
policy.rowFilterPolicyItems = [rowFilterAllowItem,rowFilterAllowItem2]
|
||||
|
||||
# Create policy in Ranger
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print(f" created policy: name={created_policy.name}, id={created_policy.id}")
|
||||
|
||||
return policy
|
||||
345
python/devo_replicator/table_generator/ranger_updater_old.py
Normal file
345
python/devo_replicator/table_generator/ranger_updater_old.py
Normal file
@@ -0,0 +1,345 @@
|
||||
from typing import List, Optional
|
||||
from apache_ranger.model.ranger_service import *
|
||||
from apache_ranger.client.ranger_client import *
|
||||
from apache_ranger.model.ranger_policy import *
|
||||
import re
|
||||
def add_table_permission_groups(corporate_store: str, target_table: str, access_type: str, source_table: str, igam_entitlement_list: List[str], columns_list: Optional[List[str]] = None, row_list: Optional[List[str]] = None):
|
||||
igam_entitlements = igam_entitlement_list + ["public"] if source_table.lower() == "rar_sources_igam_sentry" else igam_entitlement_list
|
||||
column_details = columns_list if columns_list is not None else ["*"]
|
||||
columns = column_details
|
||||
row_filter = row_list if row_list is not None else ["*"]
|
||||
filter_condition = ','.join([f"'{row}'" for row in row_filter])
|
||||
igam_roles = [x.lower() for x in igam_entitlements if x !=""]
|
||||
|
||||
return {
|
||||
'corporate_store': corporate_store,
|
||||
'target_table': target_table,
|
||||
'access_type': access_type,
|
||||
'columns': columns,
|
||||
'rows': filter_condition,
|
||||
'igam_roles': igam_roles
|
||||
}
|
||||
|
||||
from typing import List, Optional
|
||||
# --- helpers ---------------------------------------------------------------
|
||||
|
||||
def _policy_name_from_params(config, policy_id: Optional[str] = None) -> Optional[str]:
|
||||
"""
|
||||
Build the exact policy name used by your create functions.
|
||||
Returns None for types where we need to match multiple (e.g., 2a without id).
|
||||
"""
|
||||
cs = config.corporate_store.lower()
|
||||
tbl = config.target_table.lower()
|
||||
at = config.access_type.lower()
|
||||
base = f"cpo_{cs}_{tbl}_{at}"
|
||||
|
||||
if at == "1":
|
||||
# yaml_format_1
|
||||
return base
|
||||
elif at == "2a":
|
||||
# yaml_format_2a -> requires policy_id to be exact
|
||||
if policy_id:
|
||||
return f"{base}_policy_{policy_id}"
|
||||
# without policy_id, we’ll delete all that start with this prefix
|
||||
return None
|
||||
elif at == "2b":
|
||||
# yaml_format_2b
|
||||
return f"{base}_row_level_policy"
|
||||
elif at == "3":
|
||||
# yaml_format_3 uses same name pattern as 2b in your script
|
||||
return f"{base}_row_level_policy"
|
||||
else:
|
||||
raise ValueError(f"Invalid access type '{config.access_type}'. Expected one of: 1, 2a, 2b, 3.")
|
||||
|
||||
|
||||
def _ranger_client(env_config) -> RangerClient:
|
||||
ranger_url = env_config['RANGER_HOSTNAME']
|
||||
ranger_auth = ( env_config['DEVO_USERNAME'], env_config['DEVO_SECRET'])
|
||||
client = RangerClient(ranger_url, ranger_auth)
|
||||
client.session.verify = False
|
||||
return client
|
||||
|
||||
|
||||
# --- main deletion API -----------------------------------------------------
|
||||
|
||||
def delete_policy(config,env_config, policy_id: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Delete Ranger policy/policies by name based on:
|
||||
- params['corporate_store']
|
||||
- params['target_table']
|
||||
- typeOfAccess: "1", "2a", "2b", "3"
|
||||
- policy_id: optional (only meaningful for '2a')
|
||||
|
||||
Returns a list of deleted policy names.
|
||||
"""
|
||||
ranger = _ranger_client(env_config)
|
||||
service_name = "cm_hive"
|
||||
|
||||
# Try build exact name
|
||||
deleted: List[str] = []
|
||||
|
||||
|
||||
# If we don’t have an exact name (e.g. type 2a without policy_id),
|
||||
# delete *all* that match the expected prefix.
|
||||
cs = config.corporate_store.lower()
|
||||
tbl = config.target_table.lower()
|
||||
at = config.access_type.lower()
|
||||
prefix = f"cpo_{cs}_{tbl}_"
|
||||
# Fetch all policies for the table and filter client-side to reduce calls.
|
||||
start = 0
|
||||
candidates = []
|
||||
page_size=1000
|
||||
service_name="cm_hive"
|
||||
while True:
|
||||
params = {"pageSize": page_size, "startIndex": start}
|
||||
page = ranger.get_policies_in_service(service_name, params=params) or []
|
||||
candidates.extend(page)
|
||||
if len(page) < page_size:
|
||||
break
|
||||
start += len(page)
|
||||
for p in candidates:
|
||||
name = p["name"]
|
||||
print(f"analizing policy:{name}")
|
||||
if re.fullmatch(f"{prefix}([0-9]?[a-z]?)(_policy_)?([0-9]*)?(_row_level_policy)?(full_access)?$",name) != None:
|
||||
try:
|
||||
ranger.delete_policy_by_id(p["id"])
|
||||
deleted.append(name)
|
||||
except Exception:
|
||||
# continue attempting others
|
||||
pass
|
||||
if not deleted:
|
||||
raise RuntimeError(
|
||||
f"No matching policies found for deletion with prefix '{prefix}'. "
|
||||
f"Provide 'policy_id' to delete a specific 2a policy."
|
||||
)
|
||||
return deleted
|
||||
|
||||
|
||||
|
||||
def generate_policy(params,env_config, policy_id: Optional[str] = None):
|
||||
access_type = params['access_type'].lower()
|
||||
if access_type == "1":
|
||||
return yaml_format_1(params,env_config)
|
||||
elif access_type == "2a":
|
||||
return yaml_format_2a(params, env_config, policy_id)
|
||||
elif access_type == "2b":
|
||||
return yaml_format_1(params,env_config)
|
||||
elif access_type == "3":
|
||||
return yaml_format_3(params)
|
||||
else:
|
||||
raise Exception(f"Invalid access type {params['access_type']}. Please check the input param")
|
||||
|
||||
def yaml_format_1(params,env_config) -> str:
|
||||
ranger=_ranger_client(env_config)
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #harcoded
|
||||
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}" #corporatestore_table_accessType
|
||||
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
|
||||
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
|
||||
'column': RangerPolicyResource({ 'values': params['columns'] }) }
|
||||
|
||||
allowItem1 = RangerPolicyItem()
|
||||
allowItem1.groups = params['igam_roles']
|
||||
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
policy.policyItems = [ allowItem1 ]
|
||||
print(policy)
|
||||
try:
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print('Created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
except:
|
||||
pass
|
||||
'''
|
||||
yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: allow CRP RAR users to select core tables"
|
||||
devo_ranger_client:
|
||||
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}"
|
||||
policy:
|
||||
service: cm_hive
|
||||
resources:
|
||||
database:
|
||||
values:
|
||||
- {params['corporate_store'].lower()}
|
||||
table:
|
||||
values:
|
||||
- {params['target_table']}
|
||||
column:
|
||||
values:
|
||||
{params['columns']}
|
||||
policyItems:
|
||||
- groups:
|
||||
{params['igam_roles'].lower()}
|
||||
accesses:
|
||||
- select
|
||||
"""
|
||||
return yaml_format'
|
||||
'''
|
||||
|
||||
def yaml_format_2a(params, env_config,policy_id: Optional[str]) -> str:
|
||||
policy_ID = policy_id if policy_id is not None else "0"
|
||||
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
ranger = _ranger_client(env_config)
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #harcoded
|
||||
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}" #corporatestore_table_accessType
|
||||
policy.resources = { 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
|
||||
'table': RangerPolicyResource({ 'values': [params['target_table']] }),
|
||||
'column': RangerPolicyResource({ 'values': params['columns'] }) }
|
||||
allowItem1 = RangerPolicyItem()
|
||||
allowItem1.groups = params['igam_roles']
|
||||
allowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
policy.policyItems = [ allowItem1 ]
|
||||
|
||||
|
||||
print(policy)
|
||||
print("\n\n")
|
||||
|
||||
|
||||
|
||||
|
||||
#created_policy = ranger.create_policy(policy)
|
||||
#print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
|
||||
'''
|
||||
yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: allow CRP RAR users to select core tables"
|
||||
devo_ranger_client:
|
||||
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_policy_{policy_ID}"
|
||||
policy:
|
||||
service: cm_hive
|
||||
resources:
|
||||
database:
|
||||
values:
|
||||
- {params['corporate_store'].lower()}
|
||||
table:
|
||||
values:
|
||||
- {params['target_table']}
|
||||
column:
|
||||
values:
|
||||
{params['columns']}
|
||||
policyItems:
|
||||
- groups:
|
||||
{params['igam_roles'].lower()}
|
||||
accesses:
|
||||
- select
|
||||
"""
|
||||
return yaml_format'
|
||||
'''
|
||||
|
||||
def yaml_format_2b(params,env_config, full_access_list: Optional[List]) -> str:
|
||||
|
||||
# For Kerberos authentication
|
||||
#
|
||||
# from requests_kerberos import HTTPKerberosAuth
|
||||
#
|
||||
# ranger_auth = HTTPKerberosAuth()
|
||||
|
||||
ranger = _ranger_client(env_config)
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" #harcoded
|
||||
policy.name = f"cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy" #corporatestore_table_accessType
|
||||
policy.isEnabled = True
|
||||
policy.resources ={ 'database': RangerPolicyResource({ 'values': [params['corporate_store'].lower()] }),
|
||||
'table': RangerPolicyResource({ 'values': [params['target_table']] })}
|
||||
rowFilterAllowItem1= RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem1.groups = params['igam_roles']
|
||||
rowFilterAllowItem1.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
rowFilterAllowItem1.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"lower(source) IN (select lower(rar_subsource_id) from {params['corporate_store'].lower()}.t_ref_rar_sources_igam_sentry where lower(rar_igam_entitlement) IN (select ad_group from {params['corporate_store'].lower()}.active_directory_user_groups where username = lower(regexp_extract(current_user(),'[^@]*',0))))" })
|
||||
rowFilterAllowItem2= RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
|
||||
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
|
||||
policy.rowFilterPolicyItems= [rowFilterAllowItem1, rowFilterAllowItem2]
|
||||
print(policy)
|
||||
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print(' created policy: name=' + created_policy.name + ', id=' + str(created_policy.id))
|
||||
'''
|
||||
yaml_format = f"""- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: filter by confidentiality level"
|
||||
devo_ranger_client:
|
||||
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy"
|
||||
policy:
|
||||
isEnabled: "true"
|
||||
service: cm_hive
|
||||
resources:
|
||||
database:
|
||||
values:
|
||||
- {params['corporate_store'].lower()}
|
||||
table:
|
||||
values:
|
||||
- {params['target_table']}
|
||||
rowFilterPolicyItems:
|
||||
"""
|
||||
return yaml_format
|
||||
'''
|
||||
|
||||
def yaml_format_3(params, env_config,filterString, full_access_list: Optional[List]) -> str:
|
||||
|
||||
ranger = _ranger_client(env_config)
|
||||
|
||||
policy = RangerPolicy()
|
||||
policy.service = "cm_hive" # hardcoded
|
||||
policy.name = (
|
||||
f"cpo_{params['corporate_store'].lower()}_"
|
||||
f"{params['target_table'].lower()}_"
|
||||
f"{params['access_type'].lower()}_row_level_policy"
|
||||
)
|
||||
policy.isEnabled = True
|
||||
policy.resources = {
|
||||
"database": RangerPolicyResource({"values": [params["corporate_store"].lower()]}),
|
||||
"table": RangerPolicyResource({"values": [params["target_table"]]}),
|
||||
}
|
||||
|
||||
# Row filter item
|
||||
rowFilterAllowItem = RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem.groups = params["igam_roles"]
|
||||
rowFilterAllowItem.accesses = [RangerPolicyItemAccess({"type": "select"})]
|
||||
rowFilterAllowItem.rowFilterInfo = RangerPolicyItemRowFilterInfo(
|
||||
{
|
||||
"filterExpr": filterString
|
||||
}
|
||||
)
|
||||
rowFilterAllowItem2= RangerRowFilterPolicyItem()
|
||||
rowFilterAllowItem2.groups = [x.lower() for x in full_access_list]
|
||||
rowFilterAllowItem2.accesses = [RangerPolicyItemAccess({ 'type': 'select' })]
|
||||
rowFilterAllowItem2.rowFilterInfo = RangerPolicyItemRowFilterInfo({ 'filterExpr': f"1=1" })
|
||||
|
||||
policy.rowFilterPolicyItems = [rowFilterAllowItem,rowFilterAllowItem2]
|
||||
print(policy)
|
||||
# Create policy in Ranger
|
||||
created_policy = ranger.create_policy(policy)
|
||||
print(f" created policy: name={created_policy.name}, id={created_policy.id}")
|
||||
|
||||
return created_policy
|
||||
|
||||
|
||||
"""
|
||||
yaml_format = f"- name: "{{{{ ecb_env }}}} : {{{{ cdp_env_name }}}}: filter by confidentiality level"
|
||||
devo_ranger_client:
|
||||
name: "cpo_{params['corporate_store'].lower()}_{params['target_table'].lower()}_{params['access_type'].lower()}_row_level_policy"
|
||||
policy:
|
||||
isEnabled: "true"
|
||||
service: cm_hive
|
||||
resources:
|
||||
database:
|
||||
values:
|
||||
- {params['corporate_store'].lower()}
|
||||
table:
|
||||
values:
|
||||
- {params['target_table']}
|
||||
rowFilterPolicyItems:
|
||||
return yaml_format
|
||||
"""
|
||||
@@ -0,0 +1,793 @@
|
||||
import pandasql as ps
|
||||
import pandas as pd
|
||||
import mrds.utils.manage_files as fileManager
|
||||
import logging
|
||||
import tableBuilderQueries as tbq
|
||||
from devo_query import execute_query
|
||||
import ranger_updater_old as ranger
|
||||
import os
|
||||
import yaml
|
||||
import FlowOptions as fo
|
||||
import numpy as np
|
||||
from mrds.utils.secrets import get_secret
|
||||
import traceback
|
||||
from mrds.utils import oraconn
|
||||
|
||||
# Set up basic configuration for logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Create a logger object
|
||||
logger = logging.getLogger(__name__)
|
||||
import re
|
||||
|
||||
#0 utilities
|
||||
def initialize_config(config_file_path):
|
||||
# Ensure the file exists
|
||||
if not os.path.exists(config_file_path):
|
||||
raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
|
||||
|
||||
# Load the configuration
|
||||
with open(config_file_path, "r") as f:
|
||||
config_data = yaml.safe_load(f)
|
||||
|
||||
return config_data
|
||||
|
||||
def fix_impala_sql(sql: str) -> str:
|
||||
# List of reserved keywords in Impala that need backticks if used as column names
|
||||
impala_reserved_keywords = {
|
||||
'date', 'value', 'source', 'comment', 'partition', 'row', 'select', 'insert',
|
||||
'table', 'external', 'format', 'location', 'stored', 'inputformat', 'outputformat',
|
||||
'scenario', 'string', 'int', 'decimal', 'timestamp', 'float', 'double','procedure', 'floor'
|
||||
}
|
||||
|
||||
# Regex pattern to find column definitions
|
||||
pattern = re.compile(
|
||||
r'(?P<col>`?\w+`?)\s+(?P<type>[A-Za-z]+\s*(?:\([^)]+\))?)\s*(?P<comment>comment\s*\'[^\']*\'|)?',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
def replace(match):
|
||||
col = match.group('col').strip('`')
|
||||
dtype = match.group('type')
|
||||
comment = match.group('comment') or ''
|
||||
# Add backticks only if column name is a reserved keyword or contains special chars
|
||||
if col.lower() in impala_reserved_keywords or not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', col):
|
||||
col = f'`{col}`'
|
||||
return f"{col} {dtype} {comment}".strip()
|
||||
|
||||
# Only replace column list part between parentheses
|
||||
table_def_start = sql.find('(')
|
||||
table_def_end = sql.find('ROW FORMAT SERDE', table_def_start)
|
||||
if table_def_start == -1 or table_def_end == -1:
|
||||
raise ValueError("Invalid SQL format: Missing column definition parentheses.")
|
||||
|
||||
before = sql[:table_def_start + 1]
|
||||
columns = sql[table_def_start + 1:table_def_end]
|
||||
after = sql[table_def_end:]
|
||||
|
||||
# Replace all columns inside definition
|
||||
fixed_columns = pattern.sub(replace, columns)
|
||||
|
||||
# Combine and return
|
||||
final= before + fixed_columns + after
|
||||
final=final.replace("\\'", "").replace('\\\\', '\\')
|
||||
return final
|
||||
|
||||
|
||||
def applyQueryParameters(query: str, parameters: str) -> str:
|
||||
"""
|
||||
Replaces placeholders in the query with values from parameters.
|
||||
|
||||
Parameters:
|
||||
- query: Original query string with placeholders like $$$1, $$$2, etc.
|
||||
- parameters: Semicolon-separated string of parameter values.
|
||||
|
||||
Returns:
|
||||
- String with the query filled with parameter values.
|
||||
"""
|
||||
filled_query = query
|
||||
if parameters:
|
||||
# Split the parameters string and reverse the list
|
||||
params_array = parameters.split(';')[::-1]
|
||||
index = len(params_array)
|
||||
for param in params_array:
|
||||
# Replace the placeholder $$$<index> with the parameter
|
||||
placeholder = f"$$${index}"
|
||||
filled_query = filled_query.replace(placeholder, param)
|
||||
index -= 1 # Decrement the index
|
||||
return filled_query
|
||||
|
||||
def format_column_definition(row):
|
||||
if pd.isnull(row['data_description']):
|
||||
# If data_description is null, only include column_name and data_type_string
|
||||
return f"{row['column_name']} {row['data_type_string']}"
|
||||
else:
|
||||
# If data_description is present, include it with a comment
|
||||
# Ensure data_description does not contain single quotes
|
||||
data_description = str(row['data_description']).replace("'", "\\'")
|
||||
return f"{row['column_name']} {row['data_type_string']} comment '{data_description}'"
|
||||
#1 receive table name and check for target table and access type
|
||||
|
||||
def execute_oracle_query(sql):
|
||||
oracle_conn = oraconn.connect('MRDS_LOADER_MOPDB')
|
||||
cursor = oracle_conn.cursor()
|
||||
options=cursor.execute(sql).fetchall()
|
||||
oracle_conn.commit()
|
||||
|
||||
df = pd.DataFrame(options,columns= [row[0].lower() for row in cursor.description])
|
||||
## fetch db dtypes
|
||||
cursor.close()
|
||||
oracle_conn.close()
|
||||
return df
|
||||
|
||||
|
||||
def get_target_table(oracle_mgmt_table,source_schema,source_table, env):
|
||||
sql=f"SELECT DISTINCT TABLE_ALIAS FROM {oracle_mgmt_table} WHERE OWNER = '{source_schema}' AND TABLE_NAME = '{source_table}'"
|
||||
df=execute_oracle_query(sql)
|
||||
return df
|
||||
|
||||
def get_type_ofAccess(oracle_metadata_table,source_schema,source_table,env):
|
||||
sql=f"SELECT DISTINCT RAR3_TYPE_OF_ACCESS FROM {oracle_metadata_table} WHERE A_VALID_TO > SYSDATE AND OWNER = '{source_schema}'AND TABLE_NAME = '{source_table}'"
|
||||
df=execute_oracle_query(sql)
|
||||
return df
|
||||
|
||||
#2 load metadata
|
||||
def readIGAMRoles( config ,env):
|
||||
queryParams = "'" + config.sentry_role_environment + "'"
|
||||
igamRolesQuery = tbq.get_query_igam_roles(config.oracle_igam_table,config.service_name)
|
||||
logger.info(f"Querying the IGAM Table")
|
||||
|
||||
queryWithParamsIgamSentry = applyQueryParameters(igamRolesQuery, queryParams)
|
||||
|
||||
logger.info(f"Replaced params to IGAM Table:")
|
||||
|
||||
igamRoleDF = execute_oracle_query(queryWithParamsIgamSentry)
|
||||
return igamRoleDF
|
||||
|
||||
def loadMetadataTable( config,env ):
|
||||
|
||||
metadataQuery = tbq.get_query_metadata(config.oracle_metadata_table, config.source_schema, config.source_table)
|
||||
|
||||
logger.info("Map Oracle metadata (data types) to Hive query: ")
|
||||
|
||||
jdbcMetaDataDF = df=execute_oracle_query(metadataQuery)
|
||||
|
||||
logger.info("Fetch all fields for table and concatenate them separated by ','")
|
||||
tableDataList = jdbcMetaDataDF.apply(format_column_definition, axis=1).tolist()
|
||||
tableFields = ",".join(tableDataList)
|
||||
|
||||
return tableFields
|
||||
|
||||
|
||||
#3 drop table and policies
|
||||
def deleteExternalTable(config,env_config):
|
||||
try:
|
||||
deleted=ranger.delete_policy(config,env_config)
|
||||
except Exception as e:
|
||||
pass
|
||||
sql_drop = f"DROP TABLE IF EXISTS {config.corporate_store}.{config.target_table}"
|
||||
execute_query(
|
||||
sql_drop,
|
||||
env_config['DEVO_USERNAME'], env_config['IMPALA_HOSTNAME'], env_config['DEVO_SECRET'],
|
||||
)
|
||||
|
||||
#4 create external table and policies
|
||||
def createExternalTables( config, tableFields,env_config ):
|
||||
sql_create = (
|
||||
f"CREATE EXTERNAL TABLE {config.corporate_store}.{config.target_table} "
|
||||
f"({tableFields}, {config.tech_meta_data_fields}) "
|
||||
"ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' "
|
||||
"STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' "
|
||||
"OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' "
|
||||
f"LOCATION '{config.target_s3_bucket}/{config.target_table}' "
|
||||
"TBLPROPERTIES ("
|
||||
"'external.table.purge'='true', "
|
||||
"'parquet.compression'='snappy')"
|
||||
)
|
||||
sql_create=fix_impala_sql(sql_create)
|
||||
execute_query(sql_create,env_config['DEVO_USERNAME'],env_config['HIVE_HOSTNAME'],env_config['DEVO_SECRET'])
|
||||
|
||||
def createTableFromExternal( config, tableFields,env_config ):
|
||||
|
||||
sql_create = (
|
||||
f"CREATE EXTERNAL TABLE {config.corporate_store}.{config.target_table} AS "
|
||||
f"SELECT * FROM {config.corporate_store}.{config.target_table}_EXT"
|
||||
)
|
||||
|
||||
execute_query(sql_create,env_config['DEVO_USERNAME'],env_config['HIVE_HOSTNAME'],env_config['DEVO_SECRET'])
|
||||
|
||||
def accessTypeMapper(config, env_config, igamRoleDF):
|
||||
|
||||
if config.access_type.lower() == '1':
|
||||
accessType_1(config, env_config, igamRoleDF)
|
||||
elif (config.access_type.lower() == '2a'):
|
||||
accessType_2A(config, env_config, igamRoleDF)
|
||||
elif (config.access_type.lower() == '2b'):
|
||||
accessType_2B(config, env_config, igamRoleDF)
|
||||
elif (config.access_type.lower() == '3'):
|
||||
accessType_3(config, env_config, igamRoleDF)
|
||||
else:
|
||||
logger.info(f"Invalid access type {config.access_type}. Please check the input param")
|
||||
|
||||
def accessType_1(config, env_config, igamRoleDF):
|
||||
logger.info("Grant privileges for access type 1")
|
||||
logger.info("Fetch metadata from Oracle for access type 1")
|
||||
|
||||
# ---- Construct query and fetch from Oracle ----
|
||||
queryParams = f"'{config.source_schema}.{config.source_table}'"
|
||||
queryMetadataAccessType1 = tbq.get_query_metadata_access_type1(config.oracle_metadata_table)
|
||||
queryWithParamsAccessType1 = applyQueryParameters(queryMetadataAccessType1, queryParams)
|
||||
|
||||
logger.info("Metadata table query: " )
|
||||
jdbcMetaDataAccessType1DF = df=execute_oracle_query(queryWithParamsAccessType1)
|
||||
|
||||
# ---- Normalize columns ----
|
||||
df = jdbcMetaDataAccessType1DF.copy()
|
||||
df["rar3_type_of_access"] = df["rar3_type_of_access"].astype(str).str.strip()
|
||||
df["source"] = df["source"].astype(str).str.strip().str.upper()
|
||||
igamRoleDF["datasource"] = igamRoleDF["datasource"].astype(str).str.strip().str.upper()
|
||||
|
||||
# ---- Branch A: source != 'RAR' ----
|
||||
left_a = (
|
||||
df.loc[
|
||||
(df["rar3_type_of_access"] == "1") & (df["source"] != config.service_name),
|
||||
["table_name", "source"]
|
||||
]
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
branch_a = (
|
||||
left_a.merge(
|
||||
igamRoleDF,
|
||||
left_on="source",
|
||||
right_on="datasource",
|
||||
how="inner"
|
||||
)
|
||||
[["table_name", "source", "subsource_id", "igam_entitlement", "environment"]]
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
# ---- Branch B: source == 'RAR' (CROSS JOIN with igamRoleDF) ----
|
||||
left_b = (
|
||||
df.loc[
|
||||
(df["rar3_type_of_access"] == "1") & (df["source"] == config.service_name),
|
||||
["table_name", "source"]
|
||||
]
|
||||
.drop_duplicates()
|
||||
)
|
||||
|
||||
if not left_b.empty:
|
||||
branch_b = (
|
||||
left_b.merge(igamRoleDF, how="cross")
|
||||
[["table_name", "source", "subsource_id", "igam_entitlement", "environment"]]
|
||||
.drop_duplicates()
|
||||
)
|
||||
else:
|
||||
branch_b = pd.DataFrame(columns=["table_name", "source", "subsource_id", "igam_entitlement", "environment"])
|
||||
|
||||
# ---- UNION (distinct) ----
|
||||
typeOneDF = (
|
||||
pd.concat([branch_a, branch_b], ignore_index=True)
|
||||
.drop_duplicates()
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
# ---- Collect IGAM entitlements ----
|
||||
igam_entitlements = (
|
||||
typeOneDF["igam_entitlement"]
|
||||
.dropna()
|
||||
.astype(str)
|
||||
.str.strip()
|
||||
.tolist()
|
||||
)
|
||||
# Extract IGAM entitlements
|
||||
|
||||
# Merge with optional full access list
|
||||
if config.full_access_entitlement_list is None:
|
||||
combined_entitlements = igam_entitlements
|
||||
else:
|
||||
full_access_list_clean = config.full_access_entitlement_list
|
||||
combined_entitlements = igam_entitlements + full_access_list_clean
|
||||
|
||||
# Add table permission groups using YAMLFormatter
|
||||
params = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
combined_entitlements
|
||||
)
|
||||
|
||||
# Generate the final YAML policy
|
||||
formattedYaml = ranger.generate_policy(params,env_config, None)
|
||||
logger.info(f"Final YAML format")
|
||||
|
||||
return formattedYaml
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def accessType_2A(config, env_config, igamRoleDF):
|
||||
logger.info("Grant privileges for access type 2a")
|
||||
logger.info("Fetch the metadata in Oracle for access type 2a")
|
||||
|
||||
# ---- Construct query and fetch from Oracle ----
|
||||
queryParams = f"'{config.source_schema}.{config.source_table}'"
|
||||
queryMetadataAccessType2a = tbq.get_query_metadata_access_type2a(config.oracle_metadata_table)
|
||||
queryWithParamsAccessType2a = applyQueryParameters(queryMetadataAccessType2a, queryParams)
|
||||
|
||||
logger.info(f"Meta data table query: {queryWithParamsAccessType2a} ")
|
||||
jdbcMetaDataAccessType2aDF = execute_oracle_query(queryWithParamsAccessType2a)
|
||||
|
||||
# ---- Normalize columns ----
|
||||
df = jdbcMetaDataAccessType2aDF.copy()
|
||||
df["rar3_type_of_access"] = df["rar3_type_of_access"].astype(str).str.strip().str.lower()
|
||||
df["source"] = df["source"].astype(str).str.strip().str.upper()
|
||||
print(df)
|
||||
|
||||
roles = igamRoleDF.copy()
|
||||
# expected columns in igamRoleDF: rar_subsource_id, igam_entitlement, environment (plus anything else you keep)
|
||||
roles["subsource_id"] = roles["subsource_id"].astype(str).str.strip().str.upper()
|
||||
roles["igam_entitlement"] = roles["igam_entitlement"].astype(str).str.strip()
|
||||
|
||||
# ---- Branch A: source != service_name -> INNER JOIN on source == rar_subsource_id ----
|
||||
left_a = (
|
||||
df.loc[
|
||||
(df["rar3_type_of_access"] == "2a")
|
||||
& (df["source"] != config.service_name.upper()),
|
||||
["table_name", "column_name", "source"]
|
||||
]
|
||||
)
|
||||
|
||||
branch_a = (
|
||||
left_a.merge(
|
||||
roles,
|
||||
left_on="source",
|
||||
right_on="subsource_id",
|
||||
how="inner"
|
||||
)
|
||||
.drop(columns=["subsource_id", "source"], errors="ignore")
|
||||
[["table_name", "column_name", "igam_entitlement", "environment"]]
|
||||
)
|
||||
|
||||
# ---- Branch B: source == service_name -> CROSS JOIN with igamRoleDF ----
|
||||
left_b = (
|
||||
df.loc[
|
||||
(df["rar3_type_of_access"] == "2a")
|
||||
& (df["source"] == config.service_name.upper()),
|
||||
["table_name", "column_name", "source"]
|
||||
]
|
||||
)
|
||||
|
||||
if not left_b.empty:
|
||||
try:
|
||||
branch_b = (
|
||||
left_b.merge(roles, how="cross")
|
||||
.drop(columns=["subsource_id", "source"], errors="ignore")
|
||||
[["table_name", "column_name", "igam_entitlement", "environment"]]
|
||||
)
|
||||
except TypeError:
|
||||
# pandas < 1.2 fallback
|
||||
left_b["_cj"] = 1
|
||||
roles["_cj"] = 1
|
||||
branch_b = (
|
||||
left_b.merge(roles, on="_cj")
|
||||
.drop(columns=["_cj", "subsource_id", "source"], errors="ignore")
|
||||
[["table_name", "column_name", "igam_entitlement", "environment"]]
|
||||
)
|
||||
# (optional) cleanup if you keep using roles later
|
||||
roles.drop(columns=["_cj"], inplace=True, errors="ignore")
|
||||
else:
|
||||
branch_b = pd.DataFrame(columns=["table_name", "column_name", "igam_entitlement", "environment"])
|
||||
|
||||
# ---- UNION (distinct) ----
|
||||
one_df = (
|
||||
pd.concat([branch_a, branch_b], ignore_index=True)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
# ---- Group 1: (table_name, igam_entitlement) -> sorted, comma-joined column_list ----
|
||||
tmp = one_df.sort_values(["table_name", "igam_entitlement", "column_name"], kind="mergesort")
|
||||
new_df = (
|
||||
tmp.groupby(["table_name", "igam_entitlement"], as_index=False)["column_name"]
|
||||
.apply(lambda s: ",".join(s.dropna().astype(str).tolist()))
|
||||
.rename(columns={"column_name": "column_list"})
|
||||
)
|
||||
# Columns: table_name, igam_entitlement, column_list
|
||||
|
||||
# ---- Group 2: (table_name, column_list) -> comma-joined igam_entitlement ----
|
||||
grouped = (
|
||||
new_df.groupby(["table_name", "column_list"], as_index=False)["igam_entitlement"]
|
||||
.apply(lambda s: ",".join(s.dropna().astype(str).tolist()))
|
||||
)
|
||||
# Columns: table_name, column_list, igam_entitlement
|
||||
|
||||
# ---- ROW_NUMBER() OVER (ORDER BY column_list) -> policy_id ----
|
||||
grouped = grouped.sort_values(["column_list"], kind="mergesort")
|
||||
grouped["policy_id"] = np.arange(1, len(grouped) + 1).astype(int)
|
||||
|
||||
# ---- Emit policies: one per (table_name, column_list) row ----
|
||||
for _, row in grouped.iterrows():
|
||||
entitlements_list = [e.strip() for e in str(row["igam_entitlement"]).split(",") if e.strip()]
|
||||
columns_list = [c.strip() for c in str(row["column_list"]).split(",") if c.strip()]
|
||||
policy_id = str(int(row["policy_id"]))
|
||||
|
||||
params = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type, # "2a"
|
||||
config.source_table,
|
||||
entitlements_list,
|
||||
columns_list=columns_list
|
||||
)
|
||||
ranger.generate_policy(params, env_config, policy_id)
|
||||
|
||||
|
||||
# ---- Optional: append full-access YAML if list provided on config ----
|
||||
if getattr(config, "full_access_entitlement_list", None):
|
||||
# If your code already provides a list, use it directly; otherwise split string.
|
||||
if isinstance(config.full_access_entitlement_list, list):
|
||||
full_access_list = config.full_access_entitlement_list
|
||||
else:
|
||||
full_access_list = [s.strip() for s in str(config.full_access_entitlement_list).split(",") if s.strip()]
|
||||
|
||||
params_full = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type, # keep same access type per your pattern
|
||||
config.source_table,
|
||||
full_access_list
|
||||
)
|
||||
ranger.generate_policy(params_full, env_config, "full_access")
|
||||
|
||||
|
||||
|
||||
|
||||
def accessType_2B(config, env_config,igamRoleDF):
|
||||
logger.info(f"Grant privileges for access type {config.access_type}")
|
||||
logger.info("Fetch the metadata in Oracle for access type 2b")
|
||||
|
||||
# --- Validate required columns ---
|
||||
required = {"environment", "igam_entitlement", "subsource_id"}
|
||||
missing = required - set(igamRoleDF.columns)
|
||||
if missing:
|
||||
raise KeyError(f"igamRoleDF missing required column(s): {sorted(missing)}")
|
||||
|
||||
# --- Normalize to strings (robust against None/NaN) ---
|
||||
igamRoleDF = igamRoleDF.copy()
|
||||
igamRoleDF["environment"] = igamRoleDF["environment"].astype(str).str.strip()
|
||||
igamRoleDF["igam_entitlement"] = igamRoleDF["igam_entitlement"].astype(str).str.strip()
|
||||
igamRoleDF["subsource_id"] = igamRoleDF["subsource_id"].astype(str).str.strip()
|
||||
|
||||
# --- Aggregation: per (environment, igam_entitlement) collect unique subsource_id list ---
|
||||
# Keep a stable order by sorting; remove empties.
|
||||
agg_df = (
|
||||
igamRoleDF.loc[igamRoleDF["subsource_id"].ne(""), ["environment", "igam_entitlement", "subsource_id"]]
|
||||
.drop_duplicates()
|
||||
.sort_values(["environment", "igam_entitlement", "subsource_id"], kind="mergesort")
|
||||
.groupby(["environment", "igam_entitlement"], as_index=False)["subsource_id"]
|
||||
.agg(lambda s: ",".join(s.unique()))
|
||||
.rename(columns={"subsource_id": "subsource_id_list"})
|
||||
)
|
||||
|
||||
# List of tuples (IGAM_ENTITLEMENT, subsource_id_list) — mirrors your log payload
|
||||
accessType2bValidList = list(zip(
|
||||
agg_df["igam_entitlement"].astype(str),
|
||||
agg_df["subsource_id_list"].astype(str)
|
||||
))
|
||||
|
||||
# --- Entitlements for policy generation (unique, non-empty) ---
|
||||
igam_entitlements = (
|
||||
igamRoleDF["igam_entitlement"]
|
||||
.dropna()
|
||||
.map(str)
|
||||
.str.strip()
|
||||
.loc[lambda s: s.ne("")]
|
||||
.drop_duplicates()
|
||||
.tolist()
|
||||
)
|
||||
|
||||
# --- Row-level permissions (per your existing API) ---
|
||||
params_row_level = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
igam_entitlements
|
||||
)
|
||||
|
||||
# --- Table-level permissions, merging in full-access entitlements if provided ---
|
||||
if getattr(config, "full_access_entitlement_list", None):
|
||||
combined_entitlements = igam_entitlements + config.full_access_entitlement_list
|
||||
else:
|
||||
combined_entitlements = igam_entitlements
|
||||
|
||||
|
||||
# --- Emit YAML using your helpers ---
|
||||
|
||||
if getattr(config, "full_access_entitlement_list", None):
|
||||
params = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
combined_entitlements
|
||||
)
|
||||
ranger.generate_policy(params, env_config)
|
||||
|
||||
|
||||
ranger.yaml_format_2b(params_row_level,env_config, config.full_access_entitlement_list) # row-level policy
|
||||
logger.info("Final YAML format emitted for 2B.")
|
||||
|
||||
|
||||
|
||||
|
||||
def accessType_3(config,env_config, igamRoleDF):
|
||||
"""
|
||||
Python/pandas translation of the Scala accessType_3.
|
||||
Expects igamRoleDF to have at least: ['igam_entitlement', 'subsource_id'].
|
||||
The `config` object should expose the attributes used below (names match your Scala/Python usage).
|
||||
Uses a YAML formatter module `ranger` with:
|
||||
- add_table_permission_groups(corporate_store, target_table, access_type, source_table, entitlements)
|
||||
- yaml_format_3(params)
|
||||
- yaml_format_1(params)
|
||||
"""
|
||||
|
||||
# --- 1) Filter entitlements where subsource_id = 'TMS' ---
|
||||
if not {"igam_entitlement", "subsource_id"}.issubset(igamRoleDF.columns):
|
||||
missing = {"igam_entitlement", "subsource_id"} - set(igamRoleDF.columns)
|
||||
raise KeyError(f"igamRoleDF missing required column(s): {sorted(missing)}")
|
||||
|
||||
new_df = (
|
||||
igamRoleDF.loc[
|
||||
igamRoleDF["subsource_id"].astype(str).str.upper() == "TMS",
|
||||
["igam_entitlement"]
|
||||
].drop_duplicates()
|
||||
)
|
||||
|
||||
accessType3ValidList = new_df["igam_entitlement"].astype(str).str.strip().tolist()
|
||||
|
||||
# --- 2) Build params for row-level groups (type 3) ---
|
||||
params_row_level = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
accessType3ValidList
|
||||
)
|
||||
|
||||
corp = str(config.corporate_store).lower()
|
||||
src_tbl = str(config.source_table).lower()
|
||||
|
||||
# --- 3) Compose the filter expressions (match Scala strings) ---
|
||||
sqlCreateView3NonRestrString_Ptree = (
|
||||
"(parent_fk in ( "
|
||||
f"select portfolio_fk from {corp}.nh_portfolio_access "
|
||||
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
|
||||
"AND to_date(a_valid_to) > current_timestamp() "
|
||||
")) AND (child_fk in ( "
|
||||
f"select portfolio_fk from {corp}.nh_portfolio_access "
|
||||
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
|
||||
"AND to_date(a_valid_to) > current_timestamp() "
|
||||
"))"
|
||||
)
|
||||
|
||||
sqlCreateView3NonRestrString_Pos = (
|
||||
"position_key in ( "
|
||||
f"select position_key from {corp}.nh_portfolio_access a "
|
||||
f"inner join {corp}.nh_position b on ( "
|
||||
"(b.portfolio_fk = a.portfolio_fk and b.portfolio_fk is not NULL) or "
|
||||
"(b.portfolio_compare_fk = a.portfolio_fk and b.portfolio_compare_fk is not NULL) "
|
||||
") "
|
||||
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
|
||||
"AND to_date(a_valid_to) > current_timestamp() "
|
||||
")"
|
||||
)
|
||||
|
||||
sqlCreateView3PortAccess = "lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%')"
|
||||
sqlCreateView3LimAccess = "lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%')"
|
||||
|
||||
# Standard case uses the configured key columns/table names
|
||||
key_col = getattr(config, "type3SourceTableKeyColumn", None)
|
||||
acc_col = getattr(config, "type3AccessTableKeyColumn", None)
|
||||
acc_table= getattr(config, "type3AccessTable", None)
|
||||
if not all([key_col, acc_col, acc_table]):
|
||||
# Only needed for the default branch; keep None if your config doesn't use the default
|
||||
key_col = key_col or "source_key_col"
|
||||
acc_col = acc_col or "access_key_col"
|
||||
acc_table = acc_table or "type3_access_table"
|
||||
|
||||
sqlCreateView3NonRestrString_Stdrd = (
|
||||
f"{key_col} in (select {acc_col} from {corp}.{acc_table} "
|
||||
"where lower(user_id) LIKE concat('%', lower(regexp_extract(current_user(),'[^@]*',0)), '%') "
|
||||
"AND to_date(a_valid_to) > current_timestamp())"
|
||||
)
|
||||
|
||||
# --- 4) Choose the filter by source table (matches Scala match/case) ---
|
||||
if src_tbl == "nh_portfoliotree":
|
||||
sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Ptree
|
||||
elif src_tbl == "nh_position":
|
||||
sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Pos
|
||||
elif src_tbl == "nh_portfolio_access":
|
||||
sqlCreateViewType3Filter = sqlCreateView3PortAccess
|
||||
elif src_tbl == "nh_limit_access":
|
||||
sqlCreateViewType3Filter = sqlCreateView3LimAccess
|
||||
else:
|
||||
sqlCreateViewType3Filter = sqlCreateView3NonRestrString_Stdrd
|
||||
|
||||
# --- 5) Row filter YAML block (uses groups from params_row_level) ---
|
||||
# Expecting params_row_level like {'igam_roles': '...'}; adjust key if your API differs.
|
||||
igam_roles_lower = str(params_row_level.get("igam_roles", "")).lower()
|
||||
rowFilter = (
|
||||
"- groups:\n"
|
||||
f" {igam_roles_lower}\n"
|
||||
" accesses:\n"
|
||||
" - select\n"
|
||||
f" filterExpr: \"{sqlCreateViewType3Filter}\"\n"
|
||||
" "
|
||||
)
|
||||
|
||||
# --- 6) Handle optional full access entitlements ---
|
||||
|
||||
|
||||
if config.full_access_entitlement_list:
|
||||
paramsFullAccess = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
config.full_access_entitlement_list
|
||||
)
|
||||
|
||||
full_groups_lower = str(paramsFullAccess.get("igam_roles", "")).lower()
|
||||
"""
|
||||
fullAccessFilter = (
|
||||
"- groups:\n"
|
||||
f" {full_groups_lower}\n"
|
||||
" accesses:\n"
|
||||
" - select\n"
|
||||
" filterExpr: \"1=1\"\n"
|
||||
" "
|
||||
)
|
||||
"""
|
||||
params_table_level = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
accessType3ValidList + config.full_access_entitlement_list
|
||||
)
|
||||
else:
|
||||
fullAccessFilter = ""
|
||||
params_table_level = ranger.add_table_permission_groups(
|
||||
config.corporate_store,
|
||||
config.target_table,
|
||||
config.access_type,
|
||||
config.source_table,
|
||||
accessType3ValidList
|
||||
)
|
||||
|
||||
# --- 7) Render YAML and merge like Scala ---
|
||||
ranger.yaml_format_3(params_row_level,env_config,sqlCreateViewType3Filter,config.full_access_entitlement_list ) # base type 3 yaml
|
||||
ranger.yaml_format_1(params_table_level,env_config) # table-level yaml
|
||||
|
||||
|
||||
#5 create extra policies for super-users
|
||||
#6 refresh metadata
|
||||
|
||||
def run_process(env_file, env, service_name,source_schema,source_table,sentry_role_environment):
|
||||
#1 receive table name and check for target table and access type
|
||||
env_dict=initialize_config(env_file)
|
||||
env_config=env_dict[env]
|
||||
if service_name.lower()=='rqsd':
|
||||
env_config["DEVO_SECRET"]=env_config["DEVO_SECRET_RQSD"]
|
||||
env_config["DEVO_USERNAME"]=env_config["DEVO_USERNAME_RQSD"]
|
||||
try:
|
||||
devo_secret_name = env_config["DEVO_SECRET"]
|
||||
env_config["DEVO_SECRET"]= get_secret(devo_secret_name)
|
||||
except:
|
||||
logger.error("Failed to retrieve credentials from secrets")
|
||||
raise(Exception)
|
||||
db_config=env_dict[service_name]
|
||||
try:
|
||||
target_table=get_target_table(db_config['oracle_mgmt_table'],source_schema,source_table,env)['table_alias'][0]
|
||||
except Exception as e:
|
||||
logger.error("Table not found in oracle management table")
|
||||
logger.error("Exception: %s", e)
|
||||
logger.error("Traceback:\n%s", traceback.format_exc())
|
||||
raise
|
||||
try:
|
||||
access_type=get_type_ofAccess(db_config['oracle_metadata_table'],source_schema,source_table,env)['rar3_type_of_access'][0].strip()
|
||||
except Exception as e:
|
||||
logger.error("Table not found in oracle metadata inventory")
|
||||
logger.error("Exception: %s", e)
|
||||
logger.error("Traceback:\n%s", traceback.format_exc())
|
||||
raise
|
||||
args={
|
||||
'corporate_store':db_config['corporate_store'],
|
||||
'service_name': service_name,
|
||||
'source_schema':source_schema,
|
||||
'source_table':source_table,
|
||||
'oracle_metadata_table':db_config['oracle_metadata_table'],
|
||||
'oracle_igam_table':db_config['oracle_igam_table'],
|
||||
'oracle_mgmt_table': db_config['oracle_mgmt_table'],
|
||||
'target_table':target_table,
|
||||
'sentry_role_environment':sentry_role_environment,
|
||||
'target_s3_bucket': env_config["BUCKET_PREFIX"]+db_config['target_s3_bucket'] ,
|
||||
'tech_meta_data_fields': db_config['tech_meta_data_fields'],
|
||||
'full_access_entitlement_list':env_config[f"FULL_ACCESS_LIST_{service_name.upper()}"].split(','),
|
||||
'access_type': access_type
|
||||
}
|
||||
config=fo.Options(args)
|
||||
#2 load metadata
|
||||
tableFields=loadMetadataTable(config,env)
|
||||
igamRoles=readIGAMRoles(config,env)
|
||||
#3 drop table and policies
|
||||
deleteExternalTable(config,env_config)
|
||||
#4 create external table and policies
|
||||
if (config.target_table[-4:].upper() == '_EXT'):
|
||||
createExternalTables( config, tableFields,env_config )
|
||||
else:
|
||||
createTableFromExternal( config, tableFields,env_config)
|
||||
|
||||
accessTypeMapper(config,env_config,igamRoles)
|
||||
#5 refresh metadata
|
||||
#execute_query(f"INVALIDATE METADATA {config.corporate_store}.{config.target_table}",env_config["DEVO_USERNAME"],env_config['IMPALA_HOSTNAME'],env_config['DEVO_SECRET'])
|
||||
#execute_query(f"COMPUTE STATS {config.corporate_store}.{config.target_table}",env_config["DEVO_USERNAME"],env_config['IMPALA_HOSTNAME'],env_config['DEVO_SECRET'])
|
||||
|
||||
#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/env_config.yaml",'tst','mopdb','MPEC','T_MPEC','TEST/INTEGRATION')
|
||||
|
||||
#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'tst','rar','CORR_RAR','NH_ASSET','TEST/INTEGRATION')
|
||||
#run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'dev','rar','CORR_RAR','NH_LIMIT','TEST/INTEGRATION')
|
||||
|
||||
run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'dev','rar','CORR_RAR','NH_Asset_transactial_data'.upper(),'TEST/INTEGRATION')
|
||||
"""
|
||||
df=execute_oracle_query('select owner, table_name from CT_MRDS.A_DEVO_REPLICA_MGMT_RQSD')
|
||||
listfail=[]
|
||||
for index, row in df.iterrows():
|
||||
|
||||
try:
|
||||
print("running table: ",row["table_name"])
|
||||
run_process("/home/dbt/Marco/mrds_elt/python/devo_replicator/config/env_config.yaml",'tst','rqsd',row['owner'],row['table_name'].upper(),'TEST/INTEGRATION')
|
||||
except:
|
||||
print("failed")
|
||||
listfail.append(row["table_name"])
|
||||
print("succeded")
|
||||
print(listfail)
|
||||
"""
|
||||
|
||||
|
||||
'''{"id": 48754, "guid": "d75f1491-538d-402a-a8ac-e7e21ac0be53", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_1", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
|
||||
"column": {"values": ["ASSET_FK", "ASSET_FK", "A_DWH_LOAD_SET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_FROM", "A_VALID_TO", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_TYPE_NO_ID", "CODE_VALUE", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false},
|
||||
"table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"accesses": [{"type": "select", "isAllowed": true}],
|
||||
"groups": ["a_mopdb_ea", "disc-au-bda"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}
|
||||
|
||||
{"id": 48755, "guid": "5ff857c2-3683-4178-98ce-5932c0677cd4", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_2", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
|
||||
"column": {"values": ["ASSET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false},
|
||||
"table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], "
|
||||
groups": ["su-omd-reuters-users", "a_mopdb_excess_liquidity", "a-mora-lba-exp-a", "a_rar_csdb_reference_data", "a_mopdb_uc", "a_rar_csdb_ratings_data", "a_mopdb_credit_operations", "a_rar_fxcd_data", "a_rar_mdp_bbg_data", "disc-ac-riad_cnf_n-r", "a-mora-lba-ana-a", "a_mopdb_tms_data", "disc-ac-riad_core-r", "a_mopdb_mpec", "a-led-ana-a", "a-led-exp-a", "a_mopdb_ela_all"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}
|
||||
|
||||
{"id": 48756, "guid": "1071767f-8ef6-47be-bb9b-7077ed9e9a90", "isEnabled": true, "version": 1, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_2a_policy_full_access", "policyType": 0, "policyPriority": 0, "description": "created-ranger_client-v0.0.6-2025-10-17T14:28:24.135108", "isAuditEnabled": true, "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, "column": {"values": ["*"], "isExcludes": false, "isRecursive": false}, "table": {"values": ["MU_ASSET_CODE_MAP"], "isExcludes": false, "isRecursive": false}},
|
||||
"policyItems": [{"accesses": [{"type": "select", "isAllowed": true}], "groups": ["disc-ac-rar-r"], "delegateAdmin": false}], "serviceType": "hive", "isDenyAllElse": false}'''
|
||||
|
||||
|
||||
'''
|
||||
{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_1", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false}, "table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false},
|
||||
"column": {"values": ["ASSET_FK", "ASSET_FK", "A_DWH_LOAD_SET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_FROM", "A_VALID_TO", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_TYPE_NO_ID", "CODE_VALUE", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
|
||||
"groups": ["disc-tu-bda", "t_mopdb_ea"], "accesses": [{"type": "select", "isAllowed": true}]}]}
|
||||
|
||||
|
||||
|
||||
{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_2", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
|
||||
"table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false},
|
||||
"column": {"values": ["ASSET_FK", "A_DWH_LOAD_SET_FK", "A_VALID_FROM", "A_VALID_TO", "CODE_TYPE_NO_ID", "CODE_VALUE", "INDEP_SUBPROCESS_FK", "TEC_EXECUTION_DATE", "TEC_INGESTION_DATE", "TEC_RUN_ID"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
|
||||
"groups": ["a-led-ana-t", "a-led-exp-t", "a-mora-lba-ana-t", "a-mora-lba-exp-t", "disc-tc-riad_cnf_n-r", "disc-tc-riad_core-r", "su-omd-reuters-users", "t_mopdb_credit_operations", "t_mopdb_ela_all", "t_mopdb_excess_liquidity", "t_mopdb_mpec", "t_mopdb_tms_data", "t_mopdb_uc", "t_rar_csdb_ratings_data", "t_rar_csdb_reference_data", "t_rar_fxcd_data", "t_rar_mdp_bbg_data"],
|
||||
"accesses": [{"type": "select", "isAllowed": true}]}]}
|
||||
|
||||
|
||||
|
||||
{"isEnabled": true, "isDenyAllElse": false, "service": "cm_hive", "name": "cpo_crp_rar_mu_asset_code_map_ext_2a_policy_full_access", "resources": {"database": {"values": ["crp_rar"], "isExcludes": false, "isRecursive": false},
|
||||
"table": {"values": ["MU_ASSET_CODE_MAP_EXT"], "isExcludes": false, "isRecursive": false}, "column": {"values": ["*"], "isExcludes": false, "isRecursive": false}}, "policyItems": [{"delegateAdmin": false,
|
||||
"groups": ["disc-dc-rar-r"], "accesses": [{"type": "select", "isAllowed": true}]}]}
|
||||
'''
|
||||
130
python/devo_replicator/table_generator/tableBuilderQueries.py
Normal file
130
python/devo_replicator/table_generator/tableBuilderQueries.py
Normal file
@@ -0,0 +1,130 @@
|
||||
metadata_table = "DW_RAR.NH_METADATA_INVENTORY"
|
||||
|
||||
def get_query_metadata(metadata_table, owner, table_name):
|
||||
query_metadata = (
|
||||
"WITH metaDF AS ( "
|
||||
"SELECT owner, table_name, column_id, column_name, data_type, data_precision, data_scale, "
|
||||
"CASE WHEN data_precision IS NULL AND data_scale IS NULL THEN NULL "
|
||||
"WHEN data_precision IS NOT NULL AND data_scale IS NULL THEN data_precision "
|
||||
"WHEN CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN data_precision "
|
||||
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN data_scale "
|
||||
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN NULL "
|
||||
"ELSE NULL END AS data_precision_hive, "
|
||||
"CASE WHEN data_precision IS NULL AND data_scale IS NULL THEN NULL "
|
||||
"WHEN data_precision IS NOT NULL AND data_scale IS NULL THEN CAST(0 AS INT) "
|
||||
"WHEN CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN data_scale "
|
||||
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN data_scale "
|
||||
"WHEN CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN NULL "
|
||||
"ELSE NULL END AS data_scale_hive, "
|
||||
"CASE WHEN data_type LIKE '%NUMBER%' AND data_precision IS NULL AND data_scale IS NULL THEN 'String' "
|
||||
"WHEN data_type LIKE '%NUMBER%' AND data_precision IS NOT NULL AND data_scale IS NULL THEN 'Decimal' "
|
||||
"WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) >= CAST(data_scale AS INT) AND CAST(data_scale AS INT) >= 0 THEN 'Decimal' "
|
||||
"WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) < CAST(data_scale AS INT) AND CAST(data_scale AS INT) <= 38 AND CAST(data_scale AS INT) > 0 THEN 'Decimal' "
|
||||
"WHEN data_type LIKE '%NUMBER%' AND CAST(data_precision AS INT) < CAST(data_scale AS INT) AND (CAST(data_scale AS INT) > 38 OR CAST(data_scale AS INT) < 0) THEN 'String' "
|
||||
"WHEN data_type LIKE '%CHAR%' THEN 'String' "
|
||||
"WHEN data_type LIKE '%VARCHAR2%' THEN 'String' "
|
||||
"WHEN data_type LIKE '%TIMESTAMP%' THEN 'String' "
|
||||
"WHEN data_type LIKE '%DATE%' THEN 'String' "
|
||||
"ELSE 'String' END AS data_type_hive, "
|
||||
"REGEXP_REPLACE(data_description, '''', '\\''') AS data_description "
|
||||
"FROM {0} "
|
||||
"WHERE lower(owner||'.'||table_name) = lower('{1}'||'.'||'{2}') "
|
||||
"AND a_valid_to > sysdate) "
|
||||
"SELECT owner, table_name, column_id, column_name, data_type, data_precision, data_scale, "
|
||||
"data_precision_hive, data_scale_hive, data_type_hive, "
|
||||
"CASE WHEN data_type_hive = 'Decimal' THEN 'Decimal(' || COALESCE(CAST(data_precision_hive AS VARCHAR2(30)), '') || ',' || COALESCE(CAST(data_scale_hive AS VARCHAR2(30)), '') || ')' "
|
||||
"ELSE data_type_hive END AS data_type_string, data_description "
|
||||
"FROM metaDF "
|
||||
"ORDER BY CAST(column_id AS INT) "
|
||||
).format(metadata_table, owner, table_name)
|
||||
|
||||
return query_metadata
|
||||
|
||||
def get_query_metadata_access_type1(metadata_table):
|
||||
query_metadata_access_type1 = (
|
||||
"SELECT owner, table_name, list_of_sources as SOURCE, rar3_type_of_access "
|
||||
"FROM {0} "
|
||||
"WHERE a_valid_to > sysdate "
|
||||
"AND rar3_type_of_access = '1' "
|
||||
"AND list_of_sources NOT IN 'RAR' "
|
||||
"AND lower(owner||'.'||table_name) = lower($$$1) "
|
||||
"UNION "
|
||||
"SELECT owner, table_name, list_of_sources as SOURCE, rar3_type_of_access "
|
||||
"FROM {0} "
|
||||
"WHERE a_valid_to > sysdate "
|
||||
"AND rar3_type_of_access = '1' "
|
||||
"AND owner = 'CORR_REF_MAIN' "
|
||||
"AND lower(owner||'.'||table_name) = lower($$$1) "
|
||||
).format(metadata_table)
|
||||
|
||||
return query_metadata_access_type1
|
||||
|
||||
def get_query_metadata_access_type2a(metadata_table):
|
||||
query_metadata_access_type2a = (
|
||||
"WITH rar_columns AS ( "
|
||||
"SELECT owner, table_name, column_name, source, rar3_type_of_access "
|
||||
"FROM ( "
|
||||
"SELECT owner, table_name, column_name, rar3_type_of_access, list_of_sources, "
|
||||
"tms, c2d_ea, c2d_ela, c2d_mpec, c2d_uc, ceph, lm, csdb_reference, "
|
||||
"csdb_ratings, fxcd, mdp_bbg, mdp_reu, riad_cl, riad_ou, sdw_estr, sdw_fx, "
|
||||
"top, rar, rtm, led, mdp_cma "
|
||||
"FROM {0} "
|
||||
"WHERE a_valid_to > sysdate "
|
||||
"AND rar3_type_of_access = '2a' "
|
||||
"AND lower(owner || '.' || table_name) = lower($$$1) "
|
||||
"AND list_of_sources NOT LIKE '%,%' "
|
||||
"AND upper(column_name) NOT IN ('DATABASE') "
|
||||
") a "
|
||||
"UNPIVOT ( "
|
||||
"val FOR (source) IN ( "
|
||||
"tms AS 'TMS', c2d_ea AS 'C2D_EA', c2d_ela AS 'C2D_ELA', c2d_mpec AS 'C2D_MPEC', "
|
||||
"c2d_uc AS 'C2D_UC', ceph AS 'CEPH', lm AS 'LM', csdb_reference AS 'CSDB_REFERENCE', "
|
||||
"csdb_ratings AS 'CSDB_RATINGS', fxcd AS 'FXCD', mdp_bbg AS 'MDP_BBG', mdp_reu AS 'MDP_REU', "
|
||||
"riad_cl AS 'RIAD_CL', riad_ou AS 'RIAD_OU', sdw_estr AS 'SDW_ESTR', sdw_fx AS 'SDW_FX', "
|
||||
"top AS 'TOP', rar AS 'RAR', rtm AS 'RTM', led AS 'LED', mdp_cma AS 'MDP_CMA') "
|
||||
") "
|
||||
"ORDER BY owner, table_name, column_name "
|
||||
"), "
|
||||
"dummy_entry AS ( "
|
||||
"SELECT owner, table_name, 'RAR' as SOURCE, rar3_type_of_access "
|
||||
"FROM rar_columns "
|
||||
"FETCH FIRST ROW ONLY "
|
||||
"), "
|
||||
"disc_tec_fields AS ( "
|
||||
"SELECT owner, table_name, 'TEC_INGESTION_DATE' AS column_name, source, rar3_type_of_access "
|
||||
"FROM dummy_entry "
|
||||
"UNION "
|
||||
"( "
|
||||
"SELECT owner, table_name, 'TEC_EXECUTION_DATE' AS column_name, source, rar3_type_of_access "
|
||||
"FROM dummy_entry "
|
||||
") "
|
||||
"UNION "
|
||||
"( "
|
||||
"SELECT owner, table_name, 'TEC_RUN_ID' AS column_name, source, rar3_type_of_access "
|
||||
"FROM dummy_entry "
|
||||
") "
|
||||
") "
|
||||
"SELECT owner, table_name, column_name, source, rar3_type_of_access "
|
||||
"FROM disc_tec_fields "
|
||||
"UNION "
|
||||
"SELECT owner, table_name, column_name, source, rar3_type_of_access "
|
||||
"FROM rar_columns "
|
||||
).format(metadata_table)
|
||||
|
||||
return query_metadata_access_type2a
|
||||
|
||||
def get_query_igam_roles(igam_table,service):
|
||||
if service.lower() == 'rar':
|
||||
service_entitlement='mrds'
|
||||
elif service.lower()=='mopdb':
|
||||
service_entitlement='mrds'
|
||||
else:
|
||||
service_entitlement='mrds'
|
||||
query_igam_roles = (
|
||||
"SELECT MRDS_subsource_id as Datasource, "
|
||||
"MRDS_subsource_id as subsource_id, "
|
||||
"MRDS_entitlement as IGAM_Entitlement, "
|
||||
"environment "
|
||||
"FROM {0} where lower(environment) = lower($$$1) and SERVICE_NAME='{1}'").format(igam_table,service.upper())
|
||||
|
||||
return query_igam_roles
|
||||
54
python/devo_replicator/table_generator/testScript.py
Normal file
54
python/devo_replicator/table_generator/testScript.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import os
|
||||
|
||||
import mrds_elt.python.devo_replicator.FlowOptions as ro
|
||||
import tableBuilderProcessor_2 as tbp
|
||||
|
||||
# setting variables
|
||||
# args = [
|
||||
# 'corporate_store=crp_rar',
|
||||
# 'source_schema=CORR_RAR',
|
||||
# 'source_table=NH_F_RATING',
|
||||
# 'target_table=NH_F_RATING',
|
||||
# 'access_type=1',
|
||||
# 'oracle_metadata_table=CORR_RAR.NH_METADATA_INVENTORY',
|
||||
# 'oracle_igam_table=CT_REF.RAR_SOURCES_IGAM_SENTRY',
|
||||
# 'sentry_role_environment=production',
|
||||
# 'target_s3_bucket=s3a://devo-crp-ffppyd8q',
|
||||
# 'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
|
||||
# 'full_access_entitlement_list=DISC-PC-RAR-R'
|
||||
# ]
|
||||
|
||||
# args = [
|
||||
# 'corporate_store=crp_rar',
|
||||
# 'source_schema=CORR_RAR',
|
||||
# 'source_table=NH_ASSET',
|
||||
# 'target_table=NH_ASSET',
|
||||
# 'access_type=2a',
|
||||
# 'oracle_metadata_table=CORR_RAR.NH_METADATA_INVENTORY',
|
||||
# 'oracle_igam_table=CT_REF.RAR_SOURCES_IGAM_SENTRY',
|
||||
# 'sentry_role_environment=production',
|
||||
# 'target_s3_bucket=s3a://devo-crp-ffppyd8q',
|
||||
# 'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
|
||||
# 'full_access_entitlement_list=DISC-PC-RAR-R'
|
||||
# ]
|
||||
|
||||
|
||||
args = [
|
||||
'corporate_store=crp_mopdb',
|
||||
'source_schema=MPEC',
|
||||
'source_table=T_MPEC',
|
||||
'oracle_metadata_table=CT_MOPDB.MOPDB_METADATA_INVENTORY',
|
||||
'oracle_igam_table=CT_MOPDB.MOPDB_SOURCES_IGAM_SENTRY',
|
||||
'sentry_role_environment=production',
|
||||
'target_s3_bucket=s3a://devo-crp-sbul3ju3/mopdb/db',
|
||||
'tech_meta_data_fields=tec_ingestion_date String, tec_execution_date String, tec_run_id String',
|
||||
'full_access_entitlement_list='
|
||||
]
|
||||
|
||||
rar_options = ro.Options(args)
|
||||
|
||||
tableFields = tbp.loadMetadataTable(rar_options)
|
||||
tbp.createExternalTables_CRP_RAR(rar_options,tableFields )
|
||||
igamRoleDF = tbp.readIGAMRoles(rar_options)
|
||||
|
||||
tbp.accessTypeMapper(rar_options, igamRoleDF)
|
||||
Reference in New Issue
Block a user