Files
mars-elt/python/mrds_common/mrds/utils/csv_utils.py
Grzegorz Michalski 2c225d68ac init
2026-03-02 09:47:35 +01:00

70 lines
2.5 KiB
Python

import csv
import os
TASK_HISTORY_MULTIPLIER = 1_000_000_000
def read_csv_file(csv_filepath, encoding_type="utf-8"):
with open(csv_filepath, "r", newline="", encoding=encoding_type) as csvfile:
reader = list(csv.reader(csvfile))
headers = reader[0]
data_rows = reader[1:]
return headers, data_rows
def write_data_to_csv_file(csv_filepath, data, encoding_type="utf-8"):
temp_csv_filepath = csv_filepath + ".tmp"
with open(temp_csv_filepath, "w", newline="", encoding=encoding_type) as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
writer.writerow(data["headers"])
writer.writerows(data["rows"])
os.replace(temp_csv_filepath, csv_filepath)
def add_static_columns(data_rows, headers, static_entries):
for column_header, value in static_entries:
if column_header not in headers:
headers.append(column_header)
for row in data_rows:
row.append(value)
else:
idx = headers.index(column_header)
for row in data_rows:
row[idx] = value
def add_a_key_columns(data_rows, headers, a_key_entries, task_history_key):
for column_header in a_key_entries:
if column_header not in headers:
headers.append(column_header)
for i, row in enumerate(data_rows, start=1):
a_key_value = int(task_history_key) * TASK_HISTORY_MULTIPLIER + i
row.append(str(a_key_value))
else:
idx = headers.index(column_header)
for i, row in enumerate(data_rows, start=1):
a_key_value = int(task_history_key) * TASK_HISTORY_MULTIPLIER + i
row[idx] = str(a_key_value)
def add_workflow_key_columns(data_rows, headers, workflow_key_entries, workflow_key):
for column_header in workflow_key_entries:
if column_header not in headers:
headers.append(column_header)
for row in data_rows:
row.append(workflow_key)
else:
idx = headers.index(column_header)
for row in data_rows:
row[idx] = workflow_key
def rearrange_columns(headers, data_rows, column_order):
header_to_index = {header: idx for idx, header in enumerate(headers)}
new_indices = [
header_to_index[header] for header in column_order if header in header_to_index
]
headers = [headers[idx] for idx in new_indices]
data_rows = [[row[idx] for idx in new_indices] for row in data_rows]
return headers, data_rows