Skip to content
Snippets Groups Projects
Commit 54de45f3 authored by miesch3u's avatar miesch3u
Browse files

refactor pour rendre le code plus lisible

parent b73bccb1
Branches
Tags
No related merge requests found
import pandas as pd
files = {}
dfs = {}
fileNames = ["addresses", "customers", "departments", "order_details", "orders", "products"]
# importing files
def load_file(file_path):
df = pd.read_csv(file_path)
files[file_path] = df
return df
filesPath = {"data/addresses.csv": "addresses", "data/customers.csv": "customers",
"data/departments.csv": "departments", "data/order_details.csv": "order_details",
"data/orders.csv": "orders", "data/products.csv": "products"}
dfs = {}
for file_path, name in filesPath.items():
try:
dfs[name] = load_file("../" + file_path)
print(f"Loaded {name} from {file_path}")
except Exception as e:
print(f"Failed to load {name} from {file_path}: {e}")
# cleaning data
def sanitize_data(df):
# dropping duplicates
df = df.drop_duplicates()
......@@ -35,16 +21,23 @@ def sanitize_data(df):
return df
for name, df in dfs.items():
for name in fileNames:
try:
# importing files
df = load_file("../data/" + name + ".csv")
dfs[name] = df
print(f"Loaded {name}")
# cleaning data
dfs[name] = sanitize_data(df)
print(f"Sanitized {name}")
except Exception as e:
print(f"Failed to sanitize {name}: {e}")
print(f"Failed to load {name} : {e}")
# linking tables
linked_dfs = []
links = [['customers', 'orders', 'customer id'], ['orders', 'order_details', 'order id'],
['order_details', 'products', 'product id'], ['departments', 'products', 'department'],
['orders', 'addresses', 'address id'], ['orders', 'order_details', 'order id']]
def link_data(df1, df2, key):
......@@ -56,16 +49,12 @@ def link_data(df1, df2, key):
return None
links = [['customers', 'orders', 'customer id'], ['orders', 'order_details', 'order id'],
['order_details', 'products', 'product id'], ['departments', 'products', 'department'],
['orders', 'addresses', 'address id'], ['orders', 'order_details', 'order id']]
for link in links:
linked_dfs.append(link_data(dfs[link[0]], dfs[link[1]], link[2]))
print(f"Linked {link[0]} and {link[1]} on {link[2]}")
# saving cleaned and linked data
# saving cleaned and linked data
def save_data(df, file_name):
try:
df.to_csv("../data/" + file_name, index=False)
......@@ -73,5 +62,6 @@ def save_data(df, file_name):
except Exception as e:
print(f"Failed to save {file_name}: {e}")
for name, df in dfs.items():
save_data(df, f"cleaned_{name}.csv")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment