forked from Kevin-0j/codelab
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
46 lines (34 loc) · 1.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pandas as pd
from functions import generate_email_unique, has_special_characters, save_as_csv_and_tsv, shuffle_and_save
import logging
# Main program logic
# Load File_A and File_B from the Excel sheet
file_path = 'input_files/Test Files.xlsx'
df_a = pd.read_excel(file_path, sheet_name='File_A')
df_b = pd.read_excel(file_path, sheet_name='File_B')
# Initialize dictionary for unique email storage
emails = {}
# Step 1: Generate email addresses and ensure they are unique
df_a['Email Address'] = df_a['Student Name'].apply(lambda x: generate_email_unique(x, emails))
df_b['Email Address'] = df_b['Student Name'].apply(lambda x: generate_email_unique(x, emails))
# Step 2: Save DataFrames as CSV and TSV
save_as_csv_and_tsv(df_a, 'students_a')
save_as_csv_and_tsv(df_b, 'students_b')
# Step 3: Separate male and female students
male_students_a = df_a[df_a['Gender'] == 'M']
female_students_a = df_a[df_a['Gender'] == 'F']
male_students_b = df_b[df_b['Gender'] == 'M']
female_students_b = df_b[df_b['Gender'] == 'F']
# Log the counts of male and female students
logging.info(f"Number of male students in File_A: {len(male_students_a)}")
logging.info(f"Number of female students in File_A: {len(female_students_a)}")
logging.info(f"Number of male student in File_B: {len(male_students_b)}")
logging.info(f"Number of female student in File_B: {len(female_students_b)}")
# Step 4: Detect and log names with special characters
special_char_names_a = df_a[df_a['Student Name'].apply(has_special_characters)]
special_char_names_b = df_b[df_b['Student Name'].apply(has_special_characters)]
logging.info(f"Students with special characters in File_A: {special_char_names_a['Student Name'].tolist()}")
logging.info(f"Students with special characters in File_B: {special_char_names_b['Student Name'].tolist()}")
# Step 5: Shuffle and save the data
shuffle_and_save(df_a, 'shuffled_students_a')
shuffle_and_save(df_b, 'shuffled_students_b')