init
This commit is contained in:
106
python/mrds_common/mrds/utils/security_utils.py
Normal file
106
python/mrds_common/mrds/utils/security_utils.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import re
|
||||
import logging
|
||||
|
||||
|
||||
def verify_run_id(run_id, context=None):
|
||||
"""
|
||||
Verify run_id for security compliance.
|
||||
|
||||
Args:
|
||||
run_id (str): The run_id to verify
|
||||
context (dict, optional): Airflow context for logging
|
||||
|
||||
Returns:
|
||||
str: Verified run_id
|
||||
|
||||
Raises:
|
||||
ValueError: If run_id is invalid or suspicious
|
||||
"""
|
||||
try:
|
||||
# Basic checks
|
||||
if not run_id or not isinstance(run_id, str):
|
||||
raise ValueError(
|
||||
f"Invalid run_id: must be non-empty string, got: {type(run_id).__name__}"
|
||||
)
|
||||
|
||||
run_id = run_id.strip()
|
||||
|
||||
if len(run_id) < 1 or len(run_id) > 250:
|
||||
raise ValueError(
|
||||
f"Invalid run_id: length must be 1-250 chars, got: {len(run_id)}"
|
||||
)
|
||||
|
||||
# Allow only safe characters
|
||||
if not re.match(r"^[a-zA-Z0-9_\-:+.T]+$", run_id):
|
||||
suspicious_chars = "".join(
|
||||
set(
|
||||
char for char in run_id if not re.match(r"[a-zA-Z0-9_\-:+.T]", char)
|
||||
)
|
||||
)
|
||||
logging.warning(f"SECURITY: Invalid chars in run_id: '{suspicious_chars}'")
|
||||
raise ValueError("Invalid run_id: contains unsafe characters")
|
||||
|
||||
# Check for attack patterns
|
||||
dangerous_patterns = [
|
||||
r"\.\./",
|
||||
r"\.\.\\",
|
||||
r"<script",
|
||||
r"javascript:",
|
||||
r"union\s+select",
|
||||
r"drop\s+table",
|
||||
r"insert\s+into",
|
||||
r"delete\s+from",
|
||||
r"exec\s*\(",
|
||||
r"system\s*\(",
|
||||
r"eval\s*\(",
|
||||
r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]",
|
||||
]
|
||||
|
||||
for pattern in dangerous_patterns:
|
||||
if re.search(pattern, run_id, re.IGNORECASE):
|
||||
logging.error(f"SECURITY: Dangerous pattern in run_id: '{run_id}'")
|
||||
raise ValueError("Invalid run_id: contains dangerous pattern")
|
||||
|
||||
# Log success
|
||||
if context:
|
||||
dag_id = (
|
||||
getattr(context.get("dag"), "dag_id", "unknown")
|
||||
if context.get("dag")
|
||||
else "unknown"
|
||||
)
|
||||
logging.info(f"run_id verified: '{run_id}' for DAG: '{dag_id}'")
|
||||
|
||||
return run_id
|
||||
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
f"SECURITY: run_id verification failed: '{run_id}', Error: {str(e)}"
|
||||
)
|
||||
raise ValueError(f"run_id verification failed: {str(e)}")
|
||||
|
||||
|
||||
def get_verified_run_id(context):
|
||||
"""
|
||||
Extract and verify run_id from Airflow context.
|
||||
|
||||
Args:
|
||||
context (dict): Airflow context
|
||||
|
||||
Returns:
|
||||
str: Verified run_id
|
||||
"""
|
||||
try:
|
||||
run_id = None
|
||||
if context and "ti" in context:
|
||||
run_id = context["ti"].run_id
|
||||
elif context and "run_id" in context:
|
||||
run_id = context["run_id"]
|
||||
|
||||
if not run_id:
|
||||
raise ValueError("Could not extract run_id from context")
|
||||
|
||||
return verify_run_id(run_id, context)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to get verified run_id: {str(e)}")
|
||||
raise
|
||||
Reference in New Issue
Block a user