Files
mars-elt/python/mrds_common/mrds/utils/utils.py
Grzegorz Michalski 2c225d68ac init
2026-03-02 09:47:35 +01:00

84 lines
2.6 KiB
Python

import re
def parse_uri_with_regex(uri):
"""
Parses an Oracle Object Storage URI using regular expressions to extract the namespace,
bucket name, prefix, and object name.
Parameters:
uri (str): The URI string to parse, in the format '/n/{namespace}/b/{bucketname}/o/{object_path}'
Returns:
tuple: A tuple containing (namespace, bucket_name, prefix, object_name)
"""
# Define the regular expression pattern
pattern = r"^/n/([^/]+)/b/([^/]+)/o/(.*)$"
# Match the pattern against the URI
match = re.match(pattern, uri)
if not match:
raise ValueError("Invalid URI format")
# Extract namespace, bucket name, and object path from the matched groups
namespace = match.group(1)
bucket_name = match.group(2)
object_path = match.group(3)
# Split the object path into prefix and object name
if "/" in object_path:
# Split at the last '/' to separate prefix and object name
prefix, object_name = object_path.rsplit("/", 1)
# Ensure the prefix ends with a '/'
prefix += "/"
else:
# If there is no '/', there is no prefix
prefix = ""
object_name = object_path
return namespace, bucket_name, prefix, object_name
def parse_output_columns(output_columns):
xpath_entries = []
csv_entries = []
static_entries = []
a_key_entries = []
workflow_key_entries = []
xml_position_entries = []
column_order = []
for entry in output_columns:
entry_type = entry["type"]
column_header = entry["column_header"]
column_order.append(column_header)
if entry_type == "xpath":
xpath_expr = entry["value"]
is_key = entry["is_key"]
xpath_entries.append((xpath_expr, column_header, is_key))
elif entry_type == "csv_header":
value = entry["value"]
csv_entries.append((column_header, value))
elif entry_type == "static":
value = entry["value"]
static_entries.append((column_header, value))
elif entry_type == "a_key":
a_key_entries.append(column_header)
elif entry_type == "workflow_key":
workflow_key_entries.append(column_header)
elif entry_type == "xpath_element_id": # TODO - update all xml_position namings to xpath_element_id
xpath_expr = entry["value"]
xml_position_entries.append((xpath_expr, column_header))
return (
xpath_entries,
csv_entries,
static_entries,
a_key_entries,
workflow_key_entries,
xml_position_entries,
column_order,
)