import re import logging def verify_run_id(run_id, context=None): """ Verify run_id for security compliance. Args: run_id (str): The run_id to verify context (dict, optional): Airflow context for logging Returns: str: Verified run_id Raises: ValueError: If run_id is invalid or suspicious """ try: # Basic checks if not run_id or not isinstance(run_id, str): raise ValueError( f"Invalid run_id: must be non-empty string, got: {type(run_id).__name__}" ) run_id = run_id.strip() if len(run_id) < 1 or len(run_id) > 250: raise ValueError( f"Invalid run_id: length must be 1-250 chars, got: {len(run_id)}" ) # Allow only safe characters if not re.match(r"^[a-zA-Z0-9_\-:+.T]+$", run_id): suspicious_chars = "".join( set( char for char in run_id if not re.match(r"[a-zA-Z0-9_\-:+.T]", char) ) ) logging.warning(f"SECURITY: Invalid chars in run_id: '{suspicious_chars}'") raise ValueError("Invalid run_id: contains unsafe characters") # Check for attack patterns dangerous_patterns = [ r"\.\./", r"\.\.\\", r"