Eemansleepdeprived's picture
Upload 310 files
36eb7b3 verified
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
# Load the dataset
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv")
def mean_scores_subject_gender(subject):
plt.figure(figsize=(10, 6))
sns.barplot(x='gender', y=subject + ' score', data=data)
plt.title(f'Mean {subject} Score by Gender')
plt.xlabel('Gender')
plt.ylabel(f'Mean {subject} Score')
plt.savefig('./student_analysis/requested_plots/mean_scores_subject_gender.png')
def course_and_scores_relations(subject):
plt.figure(figsize=(10, 6))
sns.boxplot(x='test preparation course', y=subject + ' score', data=data)
plt.title(f'{subject} Score Distribution by Test Preparation Course')
plt.xlabel('Test Preparation Course')
plt.ylabel(f'{subject} Score')
plt.savefig('./student_analysis/requested_plots/course_and_score_relations.png')
def plot_mean_scores():
mean_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].mean()
plt.figure(figsize=(10, 6))
sns.barplot(x=mean_scores.index, y=mean_scores.values)
plt.title('Mean Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Mean Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/mean_scores.png')
def plot_median_scores():
median_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].median()
plt.figure(figsize=(10, 6))
sns.barplot(x=median_scores.index, y=median_scores.values)
plt.title('Median Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Median Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/median_scores.png')
def plot_highest_scores_each_subject():
plt.figure(figsize=(10, 6))
highest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].max()
sns.barplot(x=highest_scores.index, y=highest_scores.values)
plt.title('Highest Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Highest Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/highest_scores.png')
def plot_lowest_scores_each_subject():
plt.figure(figsize=(10, 6))
lowest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].min()
sns.barplot(x=lowest_scores.index, y=lowest_scores.values)
plt.title('Lowest Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Lowest Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/lowest_scores.png')
def plot_scores_individual(name):
student_data = data[data['name'] == name]
if len(student_data) == 0:
print("Student not found.")
return
subjects = ['math score', 'reading score', 'writing score', 'physics score', 'computer science score']
scores = [student_data[subject].values[0] for subject in subjects]
plt.figure(figsize=(10, 6))
plt.bar(subjects, scores, color=['blue', 'green', 'red', 'orange', 'purple'])
plt.title(f'Scores for {name}')
plt.xlabel('Subjects')
plt.ylabel('Scores')
plt.ylim(0, 100) # Assuming scores are out of 100
plt.savefig('./student_analysis/requested_plots/individual_scores.png')
def plot_individual_semester_line(subjects,exam_scores):
plt.figure(figsize=(10, 6))
for subject in subjects:
sns.lineplot(x='Semester', y=subject, data=exam_scores, label=subject)
plt.title('Exam Scores Over Semesters')
plt.xlabel('Semester')
plt.ylabel('Score')
plt.legend()
plt.grid(True)
plt.savefig('./student_analysis/requested_plots/line_plot.png')
def plot_individual_semester_box(name):
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv")
plt.figure(figsize=(10, 6))
sns.boxplot(data=exam_scores.drop('Semester', axis=1))
plt.title('Distribution of Exam Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Score')
plt.savefig('./student_analysis/requested_plots/box_plot.png')
def increase_decrease(subjects,exam_scores):
report_text = ""
for subject in subjects:
exam_scores_diff = exam_scores[[subject]].diff()
# Find semester with most improvement and decline
most_improved_semester = exam_scores_diff.idxmax()[0]
most_declined_semester = exam_scores_diff.idxmin()[0]
report_text += f"For {subject}:\n"
report_text += f"Most Improvement: Semester {most_improved_semester}, Score Increase: {exam_scores_diff.loc[most_improved_semester][0]}\n"
report_text += f"Quality Decline: Semester {most_declined_semester}, Score Decrease: {exam_scores_diff.loc[most_declined_semester][0]}\n\n"
fig, ax = plt.subplots(figsize=(8, 6))
ax.text(0.5, 0.5, report_text, horizontalalignment='center', verticalalignment='center', fontsize=12)
# Remove axes
ax.axis('off')
fig.savefig('./student_analysis/requested_plots/score_comparison.png')
def default_dashboard_class(subject):
remove_files_in_directory('./student_analysis/requested_plots')
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv")
mean_scores_subject_gender(subject)
course_and_scores_relations(subject)
plot_mean_scores()
plot_median_scores()
plot_highest_scores_each_subject()
plot_highest_scores_each_subject()
def default_dashboard_student(name:str, subjects = ['maths', 'computer science', 'reading', 'writing', 'physics']):
remove_files_in_directory('./student_analysis/requested_plots')
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv")
plot_scores_individual(name)
plot_individual_semester_line(subjects,exam_scores)
plot_individual_semester_box(name)
increase_decrease(subjects,exam_scores)
def plot_dashboard_class(selected_options:list,subject:str):
remove_files_in_directory('./student_analysis/requested_plots')
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv")
option_to_function = {
"Scores with respect to gender": (mean_scores_subject_gender,(subject,)),
"Impact of course completion on grades": (course_and_scores_relations,(subject,)),
"Mean Scores": (plot_mean_scores,()),
"Median Scores": (plot_median_scores,()),
"Highest Scores": (plot_highest_scores_each_subject,()),
"Lowest Scores": (plot_lowest_scores_each_subject,()),
}
for option in selected_options:
function, params = option_to_function.get(option)
if function:
function(*params)
def plot_dashboard_student(selected_options:list,name:str,subjects:list):
remove_files_in_directory('./student_analysis/requested_plots/')
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv")
option_to_function = {
"Plot Scores for the student": (plot_scores_individual,(name,)),
"Plot Individual Semester Progress(Line Plot)": (plot_individual_semester_line,(subjects,exam_scores)),
"Plot Individual Semester Progress (Box Plot)": (plot_individual_semester_box,(name,)),
"Improvements and Decline of Marks": (increase_decrease,(subjects,exam_scores)),
}
for option in selected_options:
function, params = option_to_function.get(option)
if function:
function(*params)
def remove_files_in_directory(directory):
# Get the list of files in the directory
files = os.listdir(directory)
# Iterate over each file and remove it
for file in files:
file_path = os.path.join(directory, file)
if os.path.isfile(file_path):
os.remove(file_path)
plot_dashboard_class(['Scores with respect to gender','Highest Scores'],'maths')