Spaces:
Build error
Build error
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import os | |
# Load the dataset | |
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv") | |
def mean_scores_subject_gender(subject): | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x='gender', y=subject + ' score', data=data) | |
plt.title(f'Mean {subject} Score by Gender') | |
plt.xlabel('Gender') | |
plt.ylabel(f'Mean {subject} Score') | |
plt.savefig('./student_analysis/requested_plots/mean_scores_subject_gender.png') | |
def course_and_scores_relations(subject): | |
plt.figure(figsize=(10, 6)) | |
sns.boxplot(x='test preparation course', y=subject + ' score', data=data) | |
plt.title(f'{subject} Score Distribution by Test Preparation Course') | |
plt.xlabel('Test Preparation Course') | |
plt.ylabel(f'{subject} Score') | |
plt.savefig('./student_analysis/requested_plots/course_and_score_relations.png') | |
def plot_mean_scores(): | |
mean_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].mean() | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=mean_scores.index, y=mean_scores.values) | |
plt.title('Mean Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Mean Score') | |
plt.xticks(rotation=45) | |
plt.savefig('./student_analysis/requested_plots/mean_scores.png') | |
def plot_median_scores(): | |
median_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].median() | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=median_scores.index, y=median_scores.values) | |
plt.title('Median Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Median Score') | |
plt.xticks(rotation=45) | |
plt.savefig('./student_analysis/requested_plots/median_scores.png') | |
def plot_highest_scores_each_subject(): | |
plt.figure(figsize=(10, 6)) | |
highest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].max() | |
sns.barplot(x=highest_scores.index, y=highest_scores.values) | |
plt.title('Highest Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Highest Score') | |
plt.xticks(rotation=45) | |
plt.savefig('./student_analysis/requested_plots/highest_scores.png') | |
def plot_lowest_scores_each_subject(): | |
plt.figure(figsize=(10, 6)) | |
lowest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].min() | |
sns.barplot(x=lowest_scores.index, y=lowest_scores.values) | |
plt.title('Lowest Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Lowest Score') | |
plt.xticks(rotation=45) | |
plt.savefig('./student_analysis/requested_plots/lowest_scores.png') | |
def plot_scores_individual(name): | |
student_data = data[data['name'] == name] | |
if len(student_data) == 0: | |
print("Student not found.") | |
return | |
subjects = ['math score', 'reading score', 'writing score', 'physics score', 'computer science score'] | |
scores = [student_data[subject].values[0] for subject in subjects] | |
plt.figure(figsize=(10, 6)) | |
plt.bar(subjects, scores, color=['blue', 'green', 'red', 'orange', 'purple']) | |
plt.title(f'Scores for {name}') | |
plt.xlabel('Subjects') | |
plt.ylabel('Scores') | |
plt.ylim(0, 100) # Assuming scores are out of 100 | |
plt.savefig('./student_analysis/requested_plots/individual_scores.png') | |
def plot_individual_semester_line(subjects,exam_scores): | |
plt.figure(figsize=(10, 6)) | |
for subject in subjects: | |
sns.lineplot(x='Semester', y=subject, data=exam_scores, label=subject) | |
plt.title('Exam Scores Over Semesters') | |
plt.xlabel('Semester') | |
plt.ylabel('Score') | |
plt.legend() | |
plt.grid(True) | |
plt.savefig('./student_analysis/requested_plots/line_plot.png') | |
def plot_individual_semester_box(name): | |
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv") | |
plt.figure(figsize=(10, 6)) | |
sns.boxplot(data=exam_scores.drop('Semester', axis=1)) | |
plt.title('Distribution of Exam Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Score') | |
plt.savefig('./student_analysis/requested_plots/box_plot.png') | |
def increase_decrease(subjects,exam_scores): | |
report_text = "" | |
for subject in subjects: | |
exam_scores_diff = exam_scores[[subject]].diff() | |
# Find semester with most improvement and decline | |
most_improved_semester = exam_scores_diff.idxmax()[0] | |
most_declined_semester = exam_scores_diff.idxmin()[0] | |
report_text += f"For {subject}:\n" | |
report_text += f"Most Improvement: Semester {most_improved_semester}, Score Increase: {exam_scores_diff.loc[most_improved_semester][0]}\n" | |
report_text += f"Quality Decline: Semester {most_declined_semester}, Score Decrease: {exam_scores_diff.loc[most_declined_semester][0]}\n\n" | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
ax.text(0.5, 0.5, report_text, horizontalalignment='center', verticalalignment='center', fontsize=12) | |
# Remove axes | |
ax.axis('off') | |
fig.savefig('./student_analysis/requested_plots/score_comparison.png') | |
def default_dashboard_class(subject): | |
remove_files_in_directory('./student_analysis/requested_plots') | |
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv") | |
mean_scores_subject_gender(subject) | |
course_and_scores_relations(subject) | |
plot_mean_scores() | |
plot_median_scores() | |
plot_highest_scores_each_subject() | |
plot_highest_scores_each_subject() | |
def default_dashboard_student(name:str, subjects = ['maths', 'computer science', 'reading', 'writing', 'physics']): | |
remove_files_in_directory('./student_analysis/requested_plots') | |
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv") | |
plot_scores_individual(name) | |
plot_individual_semester_line(subjects,exam_scores) | |
plot_individual_semester_box(name) | |
increase_decrease(subjects,exam_scores) | |
def plot_dashboard_class(selected_options:list,subject:str): | |
remove_files_in_directory('./student_analysis/requested_plots') | |
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv") | |
option_to_function = { | |
"Scores with respect to gender": (mean_scores_subject_gender,(subject,)), | |
"Impact of course completion on grades": (course_and_scores_relations,(subject,)), | |
"Mean Scores": (plot_mean_scores,()), | |
"Median Scores": (plot_median_scores,()), | |
"Highest Scores": (plot_highest_scores_each_subject,()), | |
"Lowest Scores": (plot_lowest_scores_each_subject,()), | |
} | |
for option in selected_options: | |
function, params = option_to_function.get(option) | |
if function: | |
function(*params) | |
def plot_dashboard_student(selected_options:list,name:str,subjects:list): | |
remove_files_in_directory('./student_analysis/requested_plots/') | |
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv") | |
option_to_function = { | |
"Plot Scores for the student": (plot_scores_individual,(name,)), | |
"Plot Individual Semester Progress(Line Plot)": (plot_individual_semester_line,(subjects,exam_scores)), | |
"Plot Individual Semester Progress (Box Plot)": (plot_individual_semester_box,(name,)), | |
"Improvements and Decline of Marks": (increase_decrease,(subjects,exam_scores)), | |
} | |
for option in selected_options: | |
function, params = option_to_function.get(option) | |
if function: | |
function(*params) | |
def remove_files_in_directory(directory): | |
# Get the list of files in the directory | |
files = os.listdir(directory) | |
# Iterate over each file and remove it | |
for file in files: | |
file_path = os.path.join(directory, file) | |
if os.path.isfile(file_path): | |
os.remove(file_path) | |
plot_dashboard_class(['Scores with respect to gender','Highest Scores'],'maths') | |