Files
mars-elt/python/mrds_common/mrds/utils/security_utils.py
Grzegorz Michalski 2c225d68ac init
2026-03-02 09:47:35 +01:00

107 lines
3.0 KiB
Python

import re
import logging
def verify_run_id(run_id, context=None):
"""
Verify run_id for security compliance.
Args:
run_id (str): The run_id to verify
context (dict, optional): Airflow context for logging
Returns:
str: Verified run_id
Raises:
ValueError: If run_id is invalid or suspicious
"""
try:
# Basic checks
if not run_id or not isinstance(run_id, str):
raise ValueError(
f"Invalid run_id: must be non-empty string, got: {type(run_id).__name__}"
)
run_id = run_id.strip()
if len(run_id) < 1 or len(run_id) > 250:
raise ValueError(
f"Invalid run_id: length must be 1-250 chars, got: {len(run_id)}"
)
# Allow only safe characters
if not re.match(r"^[a-zA-Z0-9_\-:+.T]+$", run_id):
suspicious_chars = "".join(
set(
char for char in run_id if not re.match(r"[a-zA-Z0-9_\-:+.T]", char)
)
)
logging.warning(f"SECURITY: Invalid chars in run_id: '{suspicious_chars}'")
raise ValueError("Invalid run_id: contains unsafe characters")
# Check for attack patterns
dangerous_patterns = [
r"\.\./",
r"\.\.\\",
r"<script",
r"javascript:",
r"union\s+select",
r"drop\s+table",
r"insert\s+into",
r"delete\s+from",
r"exec\s*\(",
r"system\s*\(",
r"eval\s*\(",
r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]",
]
for pattern in dangerous_patterns:
if re.search(pattern, run_id, re.IGNORECASE):
logging.error(f"SECURITY: Dangerous pattern in run_id: '{run_id}'")
raise ValueError("Invalid run_id: contains dangerous pattern")
# Log success
if context:
dag_id = (
getattr(context.get("dag"), "dag_id", "unknown")
if context.get("dag")
else "unknown"
)
logging.info(f"run_id verified: '{run_id}' for DAG: '{dag_id}'")
return run_id
except Exception as e:
logging.error(
f"SECURITY: run_id verification failed: '{run_id}', Error: {str(e)}"
)
raise ValueError(f"run_id verification failed: {str(e)}")
def get_verified_run_id(context):
"""
Extract and verify run_id from Airflow context.
Args:
context (dict): Airflow context
Returns:
str: Verified run_id
"""
try:
run_id = None
if context and "ti" in context:
run_id = context["ti"].run_id
elif context and "run_id" in context:
run_id = context["run_id"]
if not run_id:
raise ValueError("Could not extract run_id from context")
return verify_run_id(run_id, context)
except Exception as e:
logging.error(f"Failed to get verified run_id: {str(e)}")
raise