init

2026-03-02 09:47:35 +01:00
commit 2c225d68ac
715 changed files with 130067 additions and 0 deletions
--- a/python/mrds_common/mrds/utils/utils.py
+++ b/python/mrds_common/mrds/utils/utils.py
@@ -0,0 +1,83 @@
+import re
+
+
+def parse_uri_with_regex(uri):
+    """
+    Parses an Oracle Object Storage URI using regular expressions to extract the namespace,
+    bucket name, prefix, and object name.
+
+    Parameters:
+        uri (str): The URI string to parse, in the format '/n/{namespace}/b/{bucketname}/o/{object_path}'
+
+    Returns:
+        tuple: A tuple containing (namespace, bucket_name, prefix, object_name)
+    """
+    # Define the regular expression pattern
+    pattern = r"^/n/([^/]+)/b/([^/]+)/o/(.*)$"
+
+    # Match the pattern against the URI
+    match = re.match(pattern, uri)
+
+    if not match:
+        raise ValueError("Invalid URI format")
+
+    # Extract namespace, bucket name, and object path from the matched groups
+    namespace = match.group(1)
+    bucket_name = match.group(2)
+    object_path = match.group(3)
+
+    # Split the object path into prefix and object name
+    if "/" in object_path:
+        # Split at the last '/' to separate prefix and object name
+        prefix, object_name = object_path.rsplit("/", 1)
+        # Ensure the prefix ends with a '/'
+        prefix += "/"
+    else:
+        # If there is no '/', there is no prefix
+        prefix = ""
+        object_name = object_path
+
+    return namespace, bucket_name, prefix, object_name
+
+
+def parse_output_columns(output_columns):
+    xpath_entries = []
+    csv_entries = []
+    static_entries = []
+    a_key_entries = []
+    workflow_key_entries = []
+    xml_position_entries = []
+    column_order = []
+
+    for entry in output_columns:
+        entry_type = entry["type"]
+        column_header = entry["column_header"]
+        column_order.append(column_header)
+
+        if entry_type == "xpath":
+            xpath_expr = entry["value"]
+            is_key = entry["is_key"]
+            xpath_entries.append((xpath_expr, column_header, is_key))
+        elif entry_type == "csv_header":
+            value = entry["value"]
+            csv_entries.append((column_header, value))
+        elif entry_type == "static":
+            value = entry["value"]
+            static_entries.append((column_header, value))
+        elif entry_type == "a_key":
+            a_key_entries.append(column_header)
+        elif entry_type == "workflow_key":
+            workflow_key_entries.append(column_header)
+        elif entry_type == "xpath_element_id": # TODO - update all xml_position namings to xpath_element_id
+            xpath_expr = entry["value"]
+            xml_position_entries.append((xpath_expr, column_header))
+
+    return (
+        xpath_entries,
+        csv_entries,
+        static_entries,
+        a_key_entries,
+        workflow_key_entries,
+        xml_position_entries,
+        column_order,
+    )