107 lines
3.0 KiB
Python
107 lines
3.0 KiB
Python
import re
|
|
import logging
|
|
|
|
|
|
def verify_run_id(run_id, context=None):
|
|
"""
|
|
Verify run_id for security compliance.
|
|
|
|
Args:
|
|
run_id (str): The run_id to verify
|
|
context (dict, optional): Airflow context for logging
|
|
|
|
Returns:
|
|
str: Verified run_id
|
|
|
|
Raises:
|
|
ValueError: If run_id is invalid or suspicious
|
|
"""
|
|
try:
|
|
# Basic checks
|
|
if not run_id or not isinstance(run_id, str):
|
|
raise ValueError(
|
|
f"Invalid run_id: must be non-empty string, got: {type(run_id).__name__}"
|
|
)
|
|
|
|
run_id = run_id.strip()
|
|
|
|
if len(run_id) < 1 or len(run_id) > 250:
|
|
raise ValueError(
|
|
f"Invalid run_id: length must be 1-250 chars, got: {len(run_id)}"
|
|
)
|
|
|
|
# Allow only safe characters
|
|
if not re.match(r"^[a-zA-Z0-9_\-:+.T]+$", run_id):
|
|
suspicious_chars = "".join(
|
|
set(
|
|
char for char in run_id if not re.match(r"[a-zA-Z0-9_\-:+.T]", char)
|
|
)
|
|
)
|
|
logging.warning(f"SECURITY: Invalid chars in run_id: '{suspicious_chars}'")
|
|
raise ValueError("Invalid run_id: contains unsafe characters")
|
|
|
|
# Check for attack patterns
|
|
dangerous_patterns = [
|
|
r"\.\./",
|
|
r"\.\.\\",
|
|
r"<script",
|
|
r"javascript:",
|
|
r"union\s+select",
|
|
r"drop\s+table",
|
|
r"insert\s+into",
|
|
r"delete\s+from",
|
|
r"exec\s*\(",
|
|
r"system\s*\(",
|
|
r"eval\s*\(",
|
|
r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]",
|
|
]
|
|
|
|
for pattern in dangerous_patterns:
|
|
if re.search(pattern, run_id, re.IGNORECASE):
|
|
logging.error(f"SECURITY: Dangerous pattern in run_id: '{run_id}'")
|
|
raise ValueError("Invalid run_id: contains dangerous pattern")
|
|
|
|
# Log success
|
|
if context:
|
|
dag_id = (
|
|
getattr(context.get("dag"), "dag_id", "unknown")
|
|
if context.get("dag")
|
|
else "unknown"
|
|
)
|
|
logging.info(f"run_id verified: '{run_id}' for DAG: '{dag_id}'")
|
|
|
|
return run_id
|
|
|
|
except Exception as e:
|
|
logging.error(
|
|
f"SECURITY: run_id verification failed: '{run_id}', Error: {str(e)}"
|
|
)
|
|
raise ValueError(f"run_id verification failed: {str(e)}")
|
|
|
|
|
|
def get_verified_run_id(context):
|
|
"""
|
|
Extract and verify run_id from Airflow context.
|
|
|
|
Args:
|
|
context (dict): Airflow context
|
|
|
|
Returns:
|
|
str: Verified run_id
|
|
"""
|
|
try:
|
|
run_id = None
|
|
if context and "ti" in context:
|
|
run_id = context["ti"].run_id
|
|
elif context and "run_id" in context:
|
|
run_id = context["run_id"]
|
|
|
|
if not run_id:
|
|
raise ValueError("Could not extract run_id from context")
|
|
|
|
return verify_run_id(run_id, context)
|
|
|
|
except Exception as e:
|
|
logging.error(f"Failed to get verified run_id: {str(e)}")
|
|
raise
|