import json import sys import time import re import requests from datetime import datetime, timedelta from airflow import DAG from airflow.models import Variable from airflow.operators.dummy_operator import DummyOperator from airflow.operators.python_operator import PythonOperator # from infromatic team, : connect to infromatica (akash) # Utility to make task_id Airflow-safe def sanitize_task_id(task_id: str) -> str: # Replace invalid characters with underscores sanitized = re.sub(r'[^a-zA-Z0-9_]+', '_', task_id) # Ensure task_id starts with a letter or number if not re.match(r'^[a-zA-Z0-9]', sanitized): sanitized = 'task_' + sanitized return sanitized # Fetch parameters from Airflow Variables iics_username = Variable.get("iics_username") iics_password = Variable.get("iics_password") task_type = Variable.get("task_type", default_var="MTT") base_url = Variable.get("iics_base_url", default_var="https://dm-us.informaticacloud.com/ma/api/v2/user/login") # Load task names from Airflow Variables CDI_task_name = json.loads(Variable.get("CDI_task_name", default_var='["Task_Date_Dim", "Task_Items", "Task_Store_Sales"]')) CDI_E_task_name = json.loads(Variable.get("CDI_E_task_name", default_var='["Task_Total_Store_Sales_IWDEMO"]')) # Default DAG args default_args = { 'owner': 'infa', 'depends_on_past': False, 'email': ['airflow@example.com'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=1), 'start_date': datetime.now() - timedelta(seconds=10), 'schedule': '@daily' } # API logic def get_session_id(un, pw): session_id = '' data = {'@type': 'login', 'username': un, 'password': pw} headers = {'Content-Type': 'application/json', 'Accept': 'application/json'} r = requests.post(base_url, data=json.dumps(data), headers=headers) if r.status_code == 200: session_id = r.json()["icSessionId"] server_url = r.json()["serverUrl"] else: print('API call failed:', r.status_code) print(r.text) sys.exit(1) return session_id, server_url def start_job(session_id, server_url, taskname, taskType): job_start_url = server_url + "/api/v2/job" headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'} data = {'@type': 'job', 'taskName': taskname, 'taskType': taskType} r = requests.post(job_start_url, data=json.dumps(data), headers=headers) if r.status_code == 200: response_content = r.json() print("Job", taskname, "started successfully") return response_content['taskId'], response_content['runId'], response_content['taskName'] else: print('Job failed to start:', r.status_code) print(r.text) def get_status(server_url, session_id): job_activity_url = server_url + "/api/v2/activity/activityMonitor" headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'} r = requests.get(job_activity_url, headers=headers) if r.status_code == 200: for obj in r.json(): return obj['taskId'], obj['executionState'], obj['taskName'], obj['runId'] else: print('Failed to get status:', r.status_code) print(r.text) def execute_task(task_name): session_id, server_url = get_session_id(iics_username, iics_password) task_id, run_id, _ = start_job(session_id, server_url, task_name, task_type) log_url = f"{server_url}/api/v2/activity/activityLog/" headers = {'Content-Type': 'application/json', 'icSessionId': session_id, 'Accept': 'application/json'} while True: time.sleep(15) task_status = get_status(server_url, session_id) if not task_status or task_status[1] not in {"RUNNING", "INITIALIZED", "STOPPING", "QUEUED"}: # Fetch log url = f"{log_url}?taskId={task_id}&runId={run_id}" r = requests.get(url, headers=headers) logs = r.json() for obj in logs: log_id = obj['id'] log_detail = requests.get(f"{log_url}{log_id}/sessionLog", headers=headers) print(log_detail.text) break # Define DAG dag = DAG( 'IICS_Airflow_Demo', default_args=default_args, description='A Sample IICS Airflow DAG', schedule_interval='@daily', catchup=False ) # Task group 1: CDI Tasks cdi_start = DummyOperator(task_id='cdi_start', dag=dag) cdi_end = DummyOperator(task_id='cdi_end', dag=dag) for i in CDI_task_name: safe_task_id = 'IICS_CDI_' + sanitize_task_id(i) print(f"Creating task: {safe_task_id} for original task name: {i}") cdi_task = PythonOperator( task_id=safe_task_id, python_callable=execute_task, op_kwargs={'task_name': i}, dag=dag ) cdi_start >> cdi_task >> cdi_end # Task group 2: CDI_E Tasks for j in CDI_E_task_name: safe_task_id = 'IICS_CDI_E_' + sanitize_task_id(j) print(f"Creating E task: {safe_task_id} for original task name: {j}") cdi_e_task = PythonOperator( task_id=safe_task_id, python_callable=execute_task, op_kwargs={'task_name': j}, dag=dag ) cdi_end >> cdi_e_task