Spaces:
Build error
Build error
File size: 7,944 Bytes
36eb7b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
# Load the dataset
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv")
def mean_scores_subject_gender(subject):
plt.figure(figsize=(10, 6))
sns.barplot(x='gender', y=subject + ' score', data=data)
plt.title(f'Mean {subject} Score by Gender')
plt.xlabel('Gender')
plt.ylabel(f'Mean {subject} Score')
plt.savefig('./student_analysis/requested_plots/mean_scores_subject_gender.png')
def course_and_scores_relations(subject):
plt.figure(figsize=(10, 6))
sns.boxplot(x='test preparation course', y=subject + ' score', data=data)
plt.title(f'{subject} Score Distribution by Test Preparation Course')
plt.xlabel('Test Preparation Course')
plt.ylabel(f'{subject} Score')
plt.savefig('./student_analysis/requested_plots/course_and_score_relations.png')
def plot_mean_scores():
mean_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].mean()
plt.figure(figsize=(10, 6))
sns.barplot(x=mean_scores.index, y=mean_scores.values)
plt.title('Mean Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Mean Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/mean_scores.png')
def plot_median_scores():
median_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].median()
plt.figure(figsize=(10, 6))
sns.barplot(x=median_scores.index, y=median_scores.values)
plt.title('Median Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Median Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/median_scores.png')
def plot_highest_scores_each_subject():
plt.figure(figsize=(10, 6))
highest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].max()
sns.barplot(x=highest_scores.index, y=highest_scores.values)
plt.title('Highest Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Highest Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/highest_scores.png')
def plot_lowest_scores_each_subject():
plt.figure(figsize=(10, 6))
lowest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].min()
sns.barplot(x=lowest_scores.index, y=lowest_scores.values)
plt.title('Lowest Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Lowest Score')
plt.xticks(rotation=45)
plt.savefig('./student_analysis/requested_plots/lowest_scores.png')
def plot_scores_individual(name):
student_data = data[data['name'] == name]
if len(student_data) == 0:
print("Student not found.")
return
subjects = ['math score', 'reading score', 'writing score', 'physics score', 'computer science score']
scores = [student_data[subject].values[0] for subject in subjects]
plt.figure(figsize=(10, 6))
plt.bar(subjects, scores, color=['blue', 'green', 'red', 'orange', 'purple'])
plt.title(f'Scores for {name}')
plt.xlabel('Subjects')
plt.ylabel('Scores')
plt.ylim(0, 100) # Assuming scores are out of 100
plt.savefig('./student_analysis/requested_plots/individual_scores.png')
def plot_individual_semester_line(subjects,exam_scores):
plt.figure(figsize=(10, 6))
for subject in subjects:
sns.lineplot(x='Semester', y=subject, data=exam_scores, label=subject)
plt.title('Exam Scores Over Semesters')
plt.xlabel('Semester')
plt.ylabel('Score')
plt.legend()
plt.grid(True)
plt.savefig('./student_analysis/requested_plots/line_plot.png')
def plot_individual_semester_box(name):
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv")
plt.figure(figsize=(10, 6))
sns.boxplot(data=exam_scores.drop('Semester', axis=1))
plt.title('Distribution of Exam Scores for Each Subject')
plt.xlabel('Subject')
plt.ylabel('Score')
plt.savefig('./student_analysis/requested_plots/box_plot.png')
def increase_decrease(subjects,exam_scores):
report_text = ""
for subject in subjects:
exam_scores_diff = exam_scores[[subject]].diff()
# Find semester with most improvement and decline
most_improved_semester = exam_scores_diff.idxmax()[0]
most_declined_semester = exam_scores_diff.idxmin()[0]
report_text += f"For {subject}:\n"
report_text += f"Most Improvement: Semester {most_improved_semester}, Score Increase: {exam_scores_diff.loc[most_improved_semester][0]}\n"
report_text += f"Quality Decline: Semester {most_declined_semester}, Score Decrease: {exam_scores_diff.loc[most_declined_semester][0]}\n\n"
fig, ax = plt.subplots(figsize=(8, 6))
ax.text(0.5, 0.5, report_text, horizontalalignment='center', verticalalignment='center', fontsize=12)
# Remove axes
ax.axis('off')
fig.savefig('./student_analysis/requested_plots/score_comparison.png')
def default_dashboard_class(subject):
remove_files_in_directory('./student_analysis/requested_plots')
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv")
mean_scores_subject_gender(subject)
course_and_scores_relations(subject)
plot_mean_scores()
plot_median_scores()
plot_highest_scores_each_subject()
plot_highest_scores_each_subject()
def default_dashboard_student(name:str, subjects = ['maths', 'computer science', 'reading', 'writing', 'physics']):
remove_files_in_directory('./student_analysis/requested_plots')
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv")
plot_scores_individual(name)
plot_individual_semester_line(subjects,exam_scores)
plot_individual_semester_box(name)
increase_decrease(subjects,exam_scores)
def plot_dashboard_class(selected_options:list,subject:str):
remove_files_in_directory('./student_analysis/requested_plots')
data = pd.read_csv("./student_analysis/synthetic_data_with_all_subjects.csv")
option_to_function = {
"Scores with respect to gender": (mean_scores_subject_gender,(subject,)),
"Impact of course completion on grades": (course_and_scores_relations,(subject,)),
"Mean Scores": (plot_mean_scores,()),
"Median Scores": (plot_median_scores,()),
"Highest Scores": (plot_highest_scores_each_subject,()),
"Lowest Scores": (plot_lowest_scores_each_subject,()),
}
for option in selected_options:
function, params = option_to_function.get(option)
if function:
function(*params)
def plot_dashboard_student(selected_options:list,name:str,subjects:list):
remove_files_in_directory('./student_analysis/requested_plots/')
exam_scores = pd.read_csv(f"./student_analysis/student_data/{name}.csv")
option_to_function = {
"Plot Scores for the student": (plot_scores_individual,(name,)),
"Plot Individual Semester Progress(Line Plot)": (plot_individual_semester_line,(subjects,exam_scores)),
"Plot Individual Semester Progress (Box Plot)": (plot_individual_semester_box,(name,)),
"Improvements and Decline of Marks": (increase_decrease,(subjects,exam_scores)),
}
for option in selected_options:
function, params = option_to_function.get(option)
if function:
function(*params)
def remove_files_in_directory(directory):
# Get the list of files in the directory
files = os.listdir(directory)
# Iterate over each file and remove it
for file in files:
file_path = os.path.join(directory, file)
if os.path.isfile(file_path):
os.remove(file_path)
plot_dashboard_class(['Scores with respect to gender','Highest Scores'],'maths')
|