init
This commit is contained in:
294
python/connectors/tms/TMSDBT.py
Normal file
294
python/connectors/tms/TMSDBT.py
Normal file
@@ -0,0 +1,294 @@
|
||||
|
||||
|
||||
import argparse
|
||||
from TMSQuery import XMLQuery
|
||||
|
||||
import mrds.utils.objectstore
|
||||
import tempfile
|
||||
import re
|
||||
import csv
|
||||
from io import StringIO
|
||||
|
||||
import os.path
|
||||
import os, psutil
|
||||
import sys
|
||||
|
||||
|
||||
namespace = os.getenv("BUCKET_NAMESPACE", "frcnomajoc7v")
|
||||
|
||||
def memory_usage():
|
||||
# return the memory usage in percentage like top
|
||||
process = psutil.Process(os.getpid())
|
||||
mem = process.memory_info().rss/(1024*1024*1024)
|
||||
return mem
|
||||
|
||||
|
||||
def protect_keyword(s):
|
||||
s = s.lower()
|
||||
s = s.replace(' ', '_')
|
||||
|
||||
match s.lower():
|
||||
case 'comment':
|
||||
#return '"comment"'
|
||||
return 'comment_'
|
||||
case 'date':
|
||||
#return '"date"'
|
||||
return 'date_'
|
||||
case 'number':
|
||||
#return '"number"'
|
||||
return 'number_'
|
||||
case _:
|
||||
return s
|
||||
|
||||
|
||||
cModelsDir = sys.path[0] + '/../dbt/mrds/models/ods/'
|
||||
cDatasetMultiplier = 10000000
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("command", choices=['create-model', 'create-oracle-table', 'retrieve'], help="create-model retrieve")
|
||||
parser.add_argument("-n", "--name", help="Name")
|
||||
parser.add_argument("-u", "--url", required=True, help="URL of TMS service")
|
||||
parser.add_argument("-U", "--user", required=True, help="TMS user")
|
||||
parser.add_argument("-P", "--password", required=True, help="TMS password")
|
||||
parser.add_argument("-x", "--xmlfile", help="XML file")
|
||||
parser.add_argument("-l", "--layoutfile", help="layout file")
|
||||
parser.add_argument("-f", "--format", help="output format")
|
||||
parser.add_argument("-p", "--parameter", action="append", help="Parameter")
|
||||
parser.add_argument("-c", "--column", action="append", help="Additional column")
|
||||
parser.add_argument("-d", "--destination", help="destination")
|
||||
parser.add_argument("-s", "--dataset", help="data set ID", type=int)
|
||||
parser.add_argument("-v", "--version", help="data model version", type=int, default=1)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
query = XMLQuery()
|
||||
|
||||
if args.xmlfile:
|
||||
with open(args.xmlfile) as f:
|
||||
xml = f.read()
|
||||
query.xml = xml
|
||||
|
||||
|
||||
if args.layoutfile:
|
||||
with open(args.layoutfile) as f:
|
||||
layout = f.read()
|
||||
query.layout = layout
|
||||
|
||||
if args.format:
|
||||
query.format = args.format
|
||||
|
||||
if args.parameter:
|
||||
for p in args.parameter:
|
||||
[name, value] = p.split('=', 1)
|
||||
query.parameter[name] = value
|
||||
|
||||
additional_columns = []
|
||||
if args.column:
|
||||
for p in args.column:
|
||||
[name, value] = p.split('=', 1)
|
||||
t = re.split(r'(?:\|)|(?:/)|(?::)', name, maxsplit = 2)
|
||||
name = t[0]
|
||||
type = None
|
||||
if len(t) == 2:
|
||||
type = t[1]
|
||||
if not type:
|
||||
type = 'varchar2(255)'
|
||||
additional_columns.append((name, type, value))
|
||||
|
||||
|
||||
query.normalize_output()
|
||||
|
||||
from pathlib import Path
|
||||
import pprint
|
||||
p = Path('/tmp/kurt.xml')
|
||||
p.write_text(str(query))
|
||||
|
||||
|
||||
if args.command == 'create-oracle-table':
|
||||
|
||||
d = query.describe(args.url, args.user, args. password)
|
||||
|
||||
|
||||
columns = [" a_key number(38, 0)", "a_workflow_history_key number(38, 0)"]
|
||||
for c in additional_columns:
|
||||
columns.append("%s %s"%(c[0], c[1]))
|
||||
|
||||
for col in d:
|
||||
name = protect_keyword(col[0])
|
||||
match col[1]:
|
||||
case 'text':
|
||||
columns.append(name + " varchar2(512 char)")
|
||||
case 'int':
|
||||
columns.append(name + " number(38,0)")
|
||||
case 'money':
|
||||
columns.append(name + " number(19,4)")
|
||||
case 'floating':
|
||||
columns.append(name + " binary_double")
|
||||
case 'datetime':
|
||||
columns.append(name + " date")
|
||||
case 'integer':
|
||||
columns.append(name + " number(12, 0)")
|
||||
|
||||
|
||||
sql = "create table ct_et_templates." + args.name + " (\n"
|
||||
sql = sql + ",\n ".join(columns)
|
||||
sql = sql + "\n)\n"
|
||||
|
||||
if not args.destination or args.destination == '-':
|
||||
print(sql)
|
||||
else:
|
||||
with open(args.destination, 'w') as f:
|
||||
f.write(sql)
|
||||
|
||||
|
||||
|
||||
elif args.command == 'create-ods-model':
|
||||
|
||||
d = query.describe(args.url, args.user, args. password)
|
||||
|
||||
file_name = cModelsDir + args.name + '.yml'
|
||||
f = open(file_name, 'w') # open file in append mode
|
||||
|
||||
f.write('version: %d\n' % args.version)
|
||||
|
||||
f.write('models:' + '\n')
|
||||
f.write(' - name: ' + args.name + '_dbt\n')
|
||||
f.write(' description: "A starter dbt model"' + '\n')
|
||||
f.write(' columns:' + '\n')
|
||||
for col in d:
|
||||
f.write(' - name: ' + col[0] + '\n')
|
||||
f.write(' data_type: ' + col[1] + '\n')
|
||||
f.close()
|
||||
|
||||
|
||||
file_name = cModelsDir + args.name + '.sql'
|
||||
f = open(file_name, 'w') # open file in append mode
|
||||
|
||||
|
||||
if args.destination and args.destination != '-':
|
||||
if ':' in args.destination:
|
||||
dest = args.destination.split(':', 2)
|
||||
path = dest[1]
|
||||
else:
|
||||
path = args.destination
|
||||
prefix = os.path.dirname(path)
|
||||
else:
|
||||
prefix = 'INBOX/TMS/' + args.name.upper() + '/'
|
||||
|
||||
|
||||
|
||||
|
||||
pars = "ptablename => '%s', ptemplatetablename => 'ou_tms.%s', pprefix => '%s'" % (args.name, args.name, prefix)
|
||||
print(f"creating table {args.name}")
|
||||
f.write('{{\n config(\n post_hook = "call ct_mrds.file_manager.create_external_table(%s)"\n )\n}}\n\n' % pars)
|
||||
f.write("{{ config(materialized='table') }}" + "\n")
|
||||
f.write('with source_data as (' + "\n")
|
||||
columns = []
|
||||
columns.append("cast (1 as number(38,0)) as a_key")
|
||||
columns.append("cast (1 as number(38,0)) as a_workflow_history_key")
|
||||
for col in d:
|
||||
name = protect_keyword(col[0])
|
||||
match col[1]:
|
||||
case 'text':
|
||||
columns.append("cast ('x' as varchar2(255 char)) as " + name)
|
||||
case 'int':
|
||||
columns.append("cast (1 as number(38, 0)) as " + name)
|
||||
case 'money':
|
||||
columns.append("cast (1.0 as number(19,4)) as " + name)
|
||||
case 'floating':
|
||||
columns.append("cast (1.0 as binary_double) as " + name)
|
||||
case 'datetime':
|
||||
columns.append("cast (sysdate as date) as " + name)
|
||||
case 'integer':
|
||||
columns.append("cast (1 as number(12, 0)) as " + name)
|
||||
f.write(' select\n ' + ',\n '.join(columns) + '\n')
|
||||
f.write(')\nselect * from source_data\n ')
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
elif args.command == 'retrieve':
|
||||
ret = query.execute(args.url, args.user, args. password)
|
||||
|
||||
if query.format in ('scsv', 'standard_csv') and args.dataset:
|
||||
|
||||
# Save result to temporary spooled file for further processing
|
||||
# We avoid doing this in memory to prevent issues with flow EffectivePermissions
|
||||
|
||||
f = tempfile.SpooledTemporaryFile(mode = 'w+', max_size = 200*1024*1024)
|
||||
f.write(ret)
|
||||
del ret
|
||||
f.seek(0)
|
||||
|
||||
|
||||
# Replace embedded newlines for '<br/>'
|
||||
reader = csv.reader(f)
|
||||
|
||||
sio = StringIO()
|
||||
|
||||
writer = csv.writer(sio)
|
||||
for l in reader:
|
||||
l_tmp = [s.replace('\n', '<br/>') for s in l]
|
||||
writer.writerow(l_tmp)
|
||||
f.close()
|
||||
|
||||
|
||||
# Necessary to read the data into an array of lines for further processing
|
||||
sio.seek(0)
|
||||
lines_tmp = sio.readlines()
|
||||
del sio
|
||||
|
||||
|
||||
if not lines_tmp:
|
||||
ret = ""
|
||||
else:
|
||||
# Adding artificial columns A_KEY and A_WORKFLOW_HISTORY_KEY and added columns
|
||||
additional_headers = [t[0] for t in additional_columns]
|
||||
additional_values = [t[2] for t in additional_columns]
|
||||
headers = ['A_KEY','A_WORKFLOW_HISTORY_KEY'] + additional_headers + [protect_keyword(h) for h in lines_tmp[0].split(',')]
|
||||
lines = [','.join(headers) ]
|
||||
|
||||
i = 0
|
||||
for l in lines_tmp[1:]:
|
||||
lines.append(str(args.dataset*cDatasetMultiplier + i) + ',' + str(args.dataset) + ',' + ','.join(additional_values + [l]) )
|
||||
i += 1
|
||||
|
||||
del lines_tmp
|
||||
|
||||
# Spooling again to temporary file to avoid duplication memory needs
|
||||
f = tempfile.SpooledTemporaryFile(mode = 'w+', max_size = 200*1024*1024)
|
||||
f.writelines(lines)
|
||||
del lines
|
||||
f.seek(0)
|
||||
ret = f.read()
|
||||
f.close()
|
||||
|
||||
if not args.destination or args.destination == '-':
|
||||
print(ret, end='')
|
||||
elif ':' not in args.destination:
|
||||
with open(args.destination, 'w') as f:
|
||||
f.write(ret)
|
||||
else:
|
||||
f = tempfile.NamedTemporaryFile(delete = False, mode = 'w', prefix = 'TMSDBT-', suffix = '.csv')
|
||||
f.write(ret)
|
||||
f.close()
|
||||
|
||||
dest = args.destination.split(':', 2)
|
||||
bucket = dest[0]
|
||||
dirname = os.path.dirname(dest[1])
|
||||
filename = os.path.basename(dest[1])
|
||||
client = mrds.utils.objectstore.get_client()
|
||||
with open(f.name, "r") as file:
|
||||
print(file.read())
|
||||
mrds.utils.objectstore.upload_file(client, f.name,namespace, bucket, dirname, filename)
|
||||
|
||||
os.remove(f.name)
|
||||
|
||||
if ret:
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
197
python/connectors/tms/TMSQuery.py
Normal file
197
python/connectors/tms/TMSQuery.py
Normal file
@@ -0,0 +1,197 @@
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
import re
|
||||
import base64
|
||||
import sys
|
||||
|
||||
|
||||
class XMLQuery:
|
||||
|
||||
|
||||
def __init__(self, xml = None):
|
||||
|
||||
self._format = 'xml'
|
||||
self._layout = ''
|
||||
self._parameter = {}
|
||||
|
||||
if xml:
|
||||
|
||||
self._parse_xml(xml)
|
||||
|
||||
|
||||
def _parse_xml(self, xml):
|
||||
|
||||
self._tree = ET.fromstring(xml)
|
||||
|
||||
|
||||
layout_b64 = self._tree.find('layout').text
|
||||
self._layout = base64.b64decode(layout_b64).decode('utf-8')
|
||||
|
||||
self._format = self._tree.find('format').get('type')
|
||||
|
||||
|
||||
self._parameter = {}
|
||||
for p in self._tree.findall('parameters/parameter'):
|
||||
self._parameter[p.get('name')] = p.text
|
||||
|
||||
|
||||
|
||||
def execute(self, url, user, password):
|
||||
|
||||
# curl -X POST --basic -u schilli:chili03 --data @tms_activity_interval.xml https://tmsxd104.ecbt1.tadnet.net:9443/report/
|
||||
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
|
||||
data = str(self)
|
||||
basic = HTTPBasicAuth(user, password)
|
||||
|
||||
response = requests.post(url, data=data, auth=basic, verify=False)
|
||||
|
||||
if response.status_code == 200:
|
||||
response.encoding = "utf-8"
|
||||
return response.text
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def describe(self, url, user, password):
|
||||
|
||||
orig_format = self.format
|
||||
self.format = 'xml'
|
||||
|
||||
ret = self.execute(url, user, password)
|
||||
|
||||
m = re.match('^.*?\<PlainRow\>.*?\<\/PlainRow\>', ret, re.DOTALL)
|
||||
s = m[0] + '\n</report-generator>'
|
||||
|
||||
tree = ET.fromstring(s)
|
||||
|
||||
ret = []
|
||||
row = tree.find('PlainRow')
|
||||
for c in row.findall('Column'):
|
||||
#name = c.get('name')
|
||||
name = c.text
|
||||
type = c.get('type')
|
||||
if type == 'unknown': type = 'integer'
|
||||
|
||||
ret.append((name, type))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
def describe_simple(url, user, password, xml):
|
||||
|
||||
query = XMLQuery(xml)
|
||||
|
||||
query.format='xml'
|
||||
|
||||
ret = query.execute(url = url, user = user, password = password)
|
||||
|
||||
tree = ET.fromstring(ret)
|
||||
|
||||
ret = []
|
||||
row = tree.find('PlainRow')
|
||||
for c in row.findall('Column'):
|
||||
#name = c.get('name')
|
||||
name = c.text
|
||||
type = c.get('type')
|
||||
if type == 'unknown': type = 'integer'
|
||||
|
||||
ret.append((name, type))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def normalize_output(self, date_format = 'dd/MM/yyyy', time_format = 'HH:mm:ss'):
|
||||
|
||||
lines = self.layout.splitlines()
|
||||
|
||||
lines = [re.sub(r'^date_format\s*=.*', 'date_format=' + date_format, l) for l in lines]
|
||||
lines = [re.sub(r'^time_format\s*=.*', 'time_format=' + time_format, l) for l in lines]
|
||||
lines = [re.sub(r'^NoNumberFormatting\s*=.*', 'NoNumberFormatting=1', l) for l in lines]
|
||||
|
||||
self.layout = '\n'.join(lines)
|
||||
|
||||
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name == 'format' and value not in ('bin','xml','xml3','html','txt','csv','standard_csv', 'scsv', 'pdf'):
|
||||
raise Exception("Invalid report format '" + value + "'")
|
||||
|
||||
if not name.startswith('_'):
|
||||
name = '_' + name
|
||||
|
||||
if name == '_layout' and not value.endswith('\n'):
|
||||
value = value + '\n'
|
||||
|
||||
if name == '_xml':
|
||||
self._parse_xml(value)
|
||||
return
|
||||
|
||||
try:
|
||||
self.__dict__[name] = value
|
||||
except KeyError:
|
||||
raise AttributeError
|
||||
|
||||
|
||||
|
||||
def __getattr__(self, name):
|
||||
|
||||
if not name.startswith('_'):
|
||||
name = '_' + name
|
||||
|
||||
try:
|
||||
return self.__dict__[name]
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
parameters = ''
|
||||
for k in self._parameter:
|
||||
parameters = parameters + "\n<parameter name='%s'>%s</parameter>" % (k, self._parameter[k])
|
||||
|
||||
layout_b64 = base64.b64encode(self.layout.encode('utf-8')).decode('utf-8')
|
||||
return ('<?xml version="1.0" encoding="utf-8"?>\n' + \
|
||||
'<report-generator>\n' + \
|
||||
' <format type="%s"/>\n' + \
|
||||
' <layout>\n%s</layout>\n' + \
|
||||
' <parameters>%s\n</parameters>' + \
|
||||
'</report-generator>') % (self._format, layout_b64, parameters)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
file = sys.argv[1]
|
||||
print(file)
|
||||
|
||||
with open(file) as f:
|
||||
xml = f.read()
|
||||
|
||||
|
||||
query = XMLQuery(xml)
|
||||
|
||||
print(query.layout)
|
||||
query.normalize_output()
|
||||
print(query.layout)
|
||||
|
||||
|
||||
|
||||
#query.format='xml'
|
||||
|
||||
#ret = query.execute(url = 'https://tmsxd104.ecbt1.tadnet.net:9443/report/', user = 'schilli', password = 'chili03')
|
||||
|
||||
#print(ret)
|
||||
|
||||
|
||||
desc = XMLQuery.describe_simple(url = 'https://tmsxd104.ecbt1.tadnet.net:9443/report/', user = 'schilli', password = 'chili03', xml = xml)
|
||||
|
||||
print(str(desc))
|
||||
355
python/connectors/tms/sample_DAG.py
Normal file
355
python/connectors/tms/sample_DAG.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
DAG: w_ODS_TMS_TRANSACTION (expanded example)
|
||||
Purpose:
|
||||
- Load layout+parameter metadata from TMS-layouts/w_ODS_TMS_TRANSACTION.yml
|
||||
- Call connectors/tms/TMSDBT.py to retrieve data into CSV in object storage
|
||||
- On first run, generate Oracle DDL and create an external table
|
||||
- Process file and record status in MRDS workflow tables
|
||||
Notes:
|
||||
- This is an expanded, readable version of the factory-generated DAG.
|
||||
- Replace paths/usernames/password references as appropriate.
|
||||
"""
|
||||
|
||||
import copy
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from pytz import timezone
|
||||
|
||||
# --- Project-specific deps (must exist in your Airflow image) ---
|
||||
from mrds.core import main # noqa: F401 # imported to mirror the factory env
|
||||
import mrds.utils.manage_files as mf
|
||||
import mrds.utils.manage_runs as mr
|
||||
|
||||
# ---------- Paths & constants ----------
|
||||
gScriptDir = Path(globals().get("__file__", "./_")).absolute().parent
|
||||
gDataDir = str(gScriptDir / "TMS-layouts") + "/"
|
||||
gConfigDir = str(gScriptDir / "config")
|
||||
gConnDir = "/opt/airflow/python/connectors/tms/"
|
||||
gTableDir = str(gScriptDir / "TMS-tables") + "/"
|
||||
|
||||
DAG_NAME = "w_ODS_TMS_TRANSACTION"
|
||||
ODS_TABLE = DAG_NAME
|
||||
DATABASE_NAME = "ODS"
|
||||
WF_NAME = DAG_NAME
|
||||
|
||||
default_args = {
|
||||
"owner": "ecb",
|
||||
"depends_on_past": False,
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 0,
|
||||
"execution_timeout": timedelta(minutes=60),
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
}
|
||||
|
||||
# ---------- Load YAML configs once on parse ----------
|
||||
with open(gDataDir + DAG_NAME + ".yml", "r") as f:
|
||||
report_desc = yaml.safe_load(f) or {}
|
||||
|
||||
with open(gConfigDir + "/TMS.yml", "r") as f:
|
||||
tms_config = yaml.safe_load(f)
|
||||
|
||||
# TMS + storage config
|
||||
tms_url = tms_config["TMS-URL"]
|
||||
tms_user = tms_config["TMS-user"]
|
||||
tms_pwd = tms_config["TMS-password"]
|
||||
prefix = tms_config["dest-prefix"] + DAG_NAME + "/" + DAG_NAME + "/"
|
||||
data_prefix = tms_config["data-prefix"] + DAG_NAME + "/"
|
||||
dest = tms_config["dest-bucket"] + ":" + prefix
|
||||
|
||||
# Visible vs hidden params (from layout YAML)
|
||||
params_visible = {}
|
||||
params_hidden = {}
|
||||
params_dict = report_desc.get("parameters") or {}
|
||||
for p, meta in params_dict.items():
|
||||
val = meta.get("value", None)
|
||||
if not meta.get("hidden", False):
|
||||
params_visible[p] = val
|
||||
else:
|
||||
params_hidden[p] = val
|
||||
|
||||
# ---------- Helpers (parameter handling) ----------
|
||||
def _enum_param_combinations_recursive(params, keys):
|
||||
"""
|
||||
Build all combinations of params (cartesian product), supporting
|
||||
'column(<name>)' derived lists aligned by index.
|
||||
"""
|
||||
k = None
|
||||
result = []
|
||||
keys = list(keys) # safe copy
|
||||
|
||||
while keys:
|
||||
k = keys.pop(0)
|
||||
v = params[k]
|
||||
if v or v == "":
|
||||
break
|
||||
|
||||
if not k:
|
||||
return []
|
||||
|
||||
v = v if isinstance(v, list) else [v]
|
||||
|
||||
# derived columns aligned with v (same length)
|
||||
derived_columns = []
|
||||
# params_dict[k] holds the definition, not just the value
|
||||
pdef = params_dict.get(k, {})
|
||||
for c in list(pdef):
|
||||
if re.match(r"column\(.*\)$", c):
|
||||
vtmp = pdef[c]
|
||||
vtmp = vtmp if isinstance(vtmp, list) else [vtmp]
|
||||
derived_columns.append((c, vtmp))
|
||||
|
||||
if not keys:
|
||||
for i, value in enumerate(v):
|
||||
row = [(k, value)]
|
||||
for col_key, aligned_values in derived_columns:
|
||||
row.append((col_key, aligned_values[i]))
|
||||
result.append(row)
|
||||
return result
|
||||
|
||||
combinations = _enum_param_combinations_recursive(params, keys)
|
||||
for row in combinations:
|
||||
for i, vtmp in enumerate(v):
|
||||
new_row = copy.deepcopy(row)
|
||||
new_row.append((k, vtmp))
|
||||
for col_key, aligned_values in derived_columns:
|
||||
new_row.append((col_key, aligned_values[i]))
|
||||
result.append(new_row)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _enum_param_combinations(params, sequential=False):
|
||||
# Sequential path omitted (buggy in factory; not used there either)
|
||||
return _enum_param_combinations_recursive(params, list(params))
|
||||
|
||||
|
||||
def _allowed_select(table, expression, condition="1 = 1"):
|
||||
"""
|
||||
Guarded select used by eval_params(select(...)).
|
||||
Whitelist tables to avoid arbitrary reads.
|
||||
"""
|
||||
if table.upper() not in (
|
||||
ODS_TABLE.upper(),
|
||||
"DUAL",
|
||||
"CT_MRDS.A_WORKFLOW_HISTORY",
|
||||
):
|
||||
raise Exception(f"Not allowed to select from {table}")
|
||||
res = mr.select_ods_tab(table, expression, condition)
|
||||
return res[0]
|
||||
|
||||
|
||||
def _eval_param(v):
|
||||
"""
|
||||
Evaluate special functional values:
|
||||
- select(...) => guarded DB helper above
|
||||
- eval(...) => strongly discouraged; keep disabled or restricted
|
||||
"""
|
||||
s = str(v) if v is not None else ""
|
||||
if re.match(r"\s*select\(.*\)", s):
|
||||
# Expose only 'select' symbol to eval
|
||||
return eval(s, {"select": _allowed_select}, {})
|
||||
if re.match(r"\s*eval\(.*\)\s*$", s):
|
||||
# If you really must support eval, strictly sandbox or remove this path.
|
||||
raise ValueError("eval(...) not allowed in this hardened DAG.")
|
||||
return v
|
||||
|
||||
|
||||
def _finalize_param_list(param_list):
|
||||
"""
|
||||
Apply replacements and drop virtual params according to YAML definitions.
|
||||
"""
|
||||
d = dict(param_list)
|
||||
|
||||
# Replace parameter tokens inside another parameter (string replace)
|
||||
for p, meta in params_dict.items():
|
||||
if meta.get("replace_parameter"):
|
||||
target = meta["replace_parameter"]
|
||||
if target in d and p in d and isinstance(d[target], str):
|
||||
d[target] = d[target].replace(p, str(d[p]))
|
||||
|
||||
# Drop 'virtual' params
|
||||
cleaned = []
|
||||
for k, v in d.items():
|
||||
meta = params_dict.get(k, {})
|
||||
if not meta.get("virtual", False):
|
||||
cleaned.append((k, v))
|
||||
return cleaned
|
||||
|
||||
|
||||
# ---------- Core work ----------
|
||||
def execute_report(**context):
|
||||
"""
|
||||
For each parameter combination:
|
||||
- create workflow key
|
||||
- call TMSDBT.py retrieve to land CSV
|
||||
- if first time, create Oracle table from generated DDL
|
||||
- process file, finalize workflow Y/N
|
||||
"""
|
||||
logger = logging.getLogger("airflow.task")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
run_id = context["dag_run"].run_id
|
||||
all_params = {**params_visible, **params_hidden}
|
||||
|
||||
# 1) Compute combinations
|
||||
combos = _enum_param_combinations(all_params)
|
||||
|
||||
# 2) Evaluate select(...) etc and finalize
|
||||
evaluated = []
|
||||
for combo in combos or [[]]:
|
||||
# first pass: special evaluations
|
||||
pair_list = []
|
||||
for k, v in combo:
|
||||
pair_list.append((k, _eval_param(v)))
|
||||
# second pass: replacements + pruning
|
||||
evaluated.append(_finalize_param_list(pair_list))
|
||||
|
||||
# if no combos at all, ensure we run once
|
||||
if not evaluated:
|
||||
evaluated = [[]]
|
||||
|
||||
# Timing + workflow
|
||||
ts = "{:%Y%m%d_%H%M%S}".format(datetime.now(timezone("Europe/Berlin")))
|
||||
|
||||
for idx, param_list in enumerate(evaluated, start=1):
|
||||
wf_key = mr.init_workflow(DATABASE_NAME, WF_NAME, run_id)
|
||||
file_name = f"{WF_NAME}.{wf_key}.{ts}.csv"
|
||||
|
||||
try:
|
||||
# Build connector command safely (no shell quoting games)
|
||||
cmd = [
|
||||
sys.executable, # 'python'
|
||||
os.path.join(gConnDir, "TMSDBT.py"),
|
||||
"retrieve",
|
||||
"--name", WF_NAME,
|
||||
"--url", tms_url,
|
||||
"-U", tms_user,
|
||||
"--password", tms_pwd,
|
||||
"--layoutfile", gDataDir + DAG_NAME + ".fkr",
|
||||
"-f", "scsv",
|
||||
"--dataset", str(wf_key),
|
||||
"-d", dest + file_name,
|
||||
]
|
||||
|
||||
# Map params to -p or -c switches
|
||||
for k, v in param_list:
|
||||
sval = "" if v is None else str(v).rstrip()
|
||||
m = re.match(r"column\((.*)\)$", k)
|
||||
if m:
|
||||
cmd.extend(["-c", f'{m.group(1)}={sval}'])
|
||||
else:
|
||||
cmd.extend(["-p", f"{k}={sval}"])
|
||||
mr.set_workflow_property(wf_key, DATABASE_NAME, k, sval)
|
||||
|
||||
logger.debug("Running connector: %s", json.dumps(cmd))
|
||||
res = subprocess.run(cmd, capture_output=True, check=False)
|
||||
logger.debug("stdout: %s", res.stdout.decode(errors="ignore"))
|
||||
logger.debug("stderr: %s", res.stderr.decode(errors="ignore"))
|
||||
|
||||
if res.returncode is None:
|
||||
raise RuntimeError("Connector returned no status")
|
||||
if res.returncode == 1:
|
||||
logger.info("No data returned for wf_key=%s (continuing)", wf_key)
|
||||
mr.finalise_workflow(wf_key, "Y")
|
||||
continue
|
||||
if res.returncode != 0:
|
||||
raise RuntimeError(f"Connector failed (rc={res.returncode})")
|
||||
|
||||
# Data landed -> ensure source config exists, bootstrap table if needed
|
||||
cfg = mf.execute_query(
|
||||
"select * from CT_MRDS.A_SOURCE_FILE_CONFIG "
|
||||
f"where a_source_key = 'TMS' and table_id = '{ODS_TABLE}'"
|
||||
)
|
||||
|
||||
if not cfg:
|
||||
# Generate DDL file
|
||||
ddl_cmd = [
|
||||
sys.executable,
|
||||
os.path.join(gConnDir, "TMSDBT.py"),
|
||||
"create-oracle-table",
|
||||
"--name", WF_NAME,
|
||||
"--url", tms_url,
|
||||
"-U", tms_user,
|
||||
"--password", tms_pwd,
|
||||
"--layoutfile", gDataDir + DAG_NAME + ".fkr",
|
||||
"-d", gTableDir + WF_NAME + ".sql",
|
||||
]
|
||||
for k, v in param_list:
|
||||
sval = "" if v is None else str(v).rstrip()
|
||||
m = re.match(r"column\((.*)\)$", k)
|
||||
if m:
|
||||
ddl_cmd.extend(["-c", f'{m.group(1)}={sval}'])
|
||||
else:
|
||||
ddl_cmd.extend(["-p", f"{k}={sval}"])
|
||||
|
||||
logger.debug("Generating DDL: %s", json.dumps(ddl_cmd))
|
||||
ddl_res = subprocess.run(ddl_cmd, capture_output=True, check=True)
|
||||
logger.debug("DDL stdout: %s", ddl_res.stdout.decode(errors="ignore"))
|
||||
logger.debug("DDL stderr: %s", ddl_res.stderr.decode(errors="ignore"))
|
||||
|
||||
# Execute DDL and create external table
|
||||
sql = Path(gTableDir + WF_NAME + ".sql").read_text()
|
||||
mf.execute(sql)
|
||||
mf.add_column_date_format(
|
||||
f"CT_ET_TEMPLATES.{ODS_TABLE}", "DEFAULT", "DD/MM/YYYY HH24:MI:SS"
|
||||
)
|
||||
mf.create_external_table(ODS_TABLE, f"CT_ET_TEMPLATES.{ODS_TABLE}", data_prefix)
|
||||
mf.add_source_file_config(
|
||||
"TMS",
|
||||
"INPUT",
|
||||
DAG_NAME,
|
||||
DAG_NAME,
|
||||
r".*\.csv",
|
||||
ODS_TABLE,
|
||||
f"CT_ET_TEMPLATES.{ODS_TABLE}",
|
||||
)
|
||||
|
||||
# Process landed file (register, move, etc. as per your mf impl)
|
||||
mf.process_source_file(prefix, file_name)
|
||||
mr.finalise_workflow(wf_key, "Y")
|
||||
|
||||
except BaseException as ex:
|
||||
# rich error logging, then mark workflow failed and re-raise
|
||||
ex_type, ex_value, ex_tb = sys.exc_info()
|
||||
tb = traceback.extract_tb(ex_tb)
|
||||
stack = [
|
||||
f"File: {t[0]}, Line: {t[1]}, Func: {t[2]}, Code: {t[3]}"
|
||||
for t in tb
|
||||
]
|
||||
logging.error("Exception type: %s", ex_type.__name__)
|
||||
logging.error("Exception message: %s", ex_value)
|
||||
logging.error("Stack trace: %s", stack)
|
||||
mr.finalise_workflow(wf_key, "N")
|
||||
raise
|
||||
|
||||
|
||||
# ---------- DAG definition ----------
|
||||
with DAG(
|
||||
dag_id=DAG_NAME,
|
||||
default_args=default_args,
|
||||
description=DAG_NAME,
|
||||
schedule_interval=None, # manual trigger
|
||||
params=params_visible, # visible-only; hidden merged inside task
|
||||
start_date=datetime(2025, 1, 1),
|
||||
catchup=False,
|
||||
tags=[DAG_NAME],
|
||||
) as dag:
|
||||
|
||||
retrieve_report = PythonOperator(
|
||||
task_id="retrieve_report",
|
||||
python_callable=execute_report,
|
||||
execution_timeout=timedelta(minutes=30),
|
||||
)
|
||||
Reference in New Issue
Block a user