init
This commit is contained in:
129
python/devo_replicator/data_replicator/impala_refresher.py
Normal file
129
python/devo_replicator/data_replicator/impala_refresher.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import os
|
||||
import yaml
|
||||
import datetime
|
||||
import pandas as pd
|
||||
from mrds.utils.secrets import get_secret
|
||||
import mrds.utils.manage_runs as runManager
|
||||
import mrds.utils.manage_files as fileManager
|
||||
import mrds.utils.sql_statements as sqls
|
||||
|
||||
import oci
|
||||
|
||||
from impala.dbapi import (
|
||||
connect,
|
||||
ProgrammingError,
|
||||
DatabaseError,
|
||||
IntegrityError,
|
||||
OperationalError,
|
||||
)
|
||||
from impala.error import HiveServer2Error
|
||||
|
||||
|
||||
def get_impala_connection(hostname: str, user: str, secret: str):
|
||||
conn = connect(
|
||||
host=hostname,
|
||||
port=443,
|
||||
auth_mechanism="PLAIN",
|
||||
user=user,
|
||||
password=secret,
|
||||
use_http_transport=True,
|
||||
http_path="cliservice",
|
||||
use_ssl=True
|
||||
)
|
||||
return conn
|
||||
|
||||
def execute_query(query: str,user,hostname,password):
|
||||
conn = get_impala_connection(hostname, user, password)
|
||||
print(conn)
|
||||
columns, result = execute_devo_query(query, conn)
|
||||
return columns, result
|
||||
|
||||
def execute_devo_query(query: str, conn):
|
||||
#impersonation_configuration = {"impala.doas.user": userid} # to be changed
|
||||
#impersonation_configuration = {} # to be changed
|
||||
cursor = conn.cursor()
|
||||
print("executing query")
|
||||
try:
|
||||
cursor.execute(query)
|
||||
# Check if the query is a SELECT query (i.e., reads data)
|
||||
return None, cursor.rowcount # rowcount returns the number of rows affected
|
||||
except OperationalError as oe:
|
||||
raise Exception(
|
||||
status_code=500, detail="Failed to connect to Impala: " + str(oe)
|
||||
)
|
||||
|
||||
except ProgrammingError as pe:
|
||||
raise Exception(status_code=400, detail="Query syntax error: " + str(pe))
|
||||
|
||||
except IntegrityError as ie:
|
||||
raise Exception(
|
||||
status_code=403, detail="Insufficient permissions: " + str(ie)
|
||||
)
|
||||
|
||||
except DatabaseError as db_err:
|
||||
raise Exception(status_code=500, detail="Database error: " + str(db_err))
|
||||
|
||||
except HiveServer2Error as au_err:
|
||||
raise Exception(
|
||||
status_code=403, detail="HiveServer2Error error: " + str(au_err)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
status_code=500, detail="An unexpected error occurred: " + str(e)
|
||||
) from e
|
||||
|
||||
finally:
|
||||
try:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
status_code=500, detail="Failed to close the connection: " + str(e)
|
||||
)
|
||||
|
||||
def initialize_task(workflow_context, task_name):
|
||||
# Initialize task
|
||||
a_task_history_key = runManager.init_task(
|
||||
task_name,
|
||||
workflow_context["run_id"],
|
||||
workflow_context["a_workflow_history_key"],
|
||||
)
|
||||
|
||||
return a_task_history_key
|
||||
|
||||
def initialize_config(config_file_path):
|
||||
# Ensure the file exists
|
||||
if not os.path.exists(config_file_path):
|
||||
raise FileNotFoundError(f"Configuration file {config_file_path} not found.")
|
||||
|
||||
# Load the configuration
|
||||
with open(config_file_path, "r") as f:
|
||||
config_data = yaml.safe_load(f)
|
||||
|
||||
return config_data
|
||||
|
||||
def main(env_config_path, env, table, corporate_store):
|
||||
|
||||
#init setup
|
||||
envs_info = initialize_config(env_config_path)
|
||||
environment_info = envs_info[env]
|
||||
|
||||
try:
|
||||
devo_secret_name = environment_info["DEVO_SECRET"]
|
||||
password = get_secret(devo_secret_name)
|
||||
except:
|
||||
print("Failed to retrieve credentials from secrets")
|
||||
raise(Exception)
|
||||
# get devo data
|
||||
try:
|
||||
execute_query(f"INVALIDATE METADATA {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
|
||||
execute_query(f"COMPUTE STATS {corporate_store}.{table}", environment_info['DEVO_USERNAME'], environment_info['IMPALA_HOSTNAME'], password)
|
||||
|
||||
except:
|
||||
print("Failed to retrieve DEVO data, error during connection or request")
|
||||
raise(Exception)
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user