Spaces:
Build error
Build error
# %% | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# Load the dataset | |
data = pd.read_csv("synthetic_data_with_all_subjects.csv") # Replace "your_dataset.csv" with the path to your dataset file | |
# Display the first few rows of the dataset | |
data.head() | |
# %% | |
# Bar plot of mean scores by gender | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x='gender', y='math score', data=data) | |
plt.title('Mean Math Score by Gender') | |
plt.xlabel('Gender') | |
plt.ylabel('Mean Math Score') | |
plt.show() | |
# %% | |
# Box plot of scores distribution by test preparation course | |
plt.figure(figsize=(10, 6)) | |
sns.boxplot(x='test preparation course', y='reading score', data=data) | |
plt.title('Reading Score Distribution by Test Preparation Course') | |
plt.xlabel('Test Preparation Course') | |
plt.ylabel('Reading Score') | |
plt.show() | |
# %% | |
# Violin plot of writing scores by parental level of education | |
plt.figure(figsize=(12, 8)) | |
sns.violinplot(x='parental level of education', y='writing score', data=data) | |
plt.title('Writing Score Distribution by Parental Level of Education') | |
plt.xlabel('Parental Level of Education') | |
plt.ylabel('Writing Score') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% | |
# Pair plot of all scores | |
sns.pairplot(data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']]) | |
plt.show() | |
# %% | |
mean_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].mean() | |
median_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].median() | |
# Plot mean scores | |
# %% | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=mean_scores.index, y=mean_scores.values) | |
plt.title('Mean Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Mean Score') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=median_scores.index, y=median_scores.values) | |
plt.title('Median Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Median Score') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% | |
highest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].max() | |
lowest_scores = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].min() | |
# %% | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=highest_scores.index, y=highest_scores.values) | |
plt.title('Highest Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Highest Score') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=lowest_scores.index, y=lowest_scores.values) | |
plt.title('Lowest Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Lowest Score') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% | |
highest_scorers = data[['math score', 'reading score', 'writing score', 'physics score', 'computer science score']].idxmax(axis=0) | |
# Plot highest scorers | |
plt.figure(figsize=(10, 6)) | |
sns.countplot(highest_scorers) | |
plt.title('Highest Scorer in Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Number of Students') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% [markdown] | |
# ### STUDENT INDIVIDUAL DATA VSI | |
# %% | |
student_data = data.iloc[0] | |
# Plot individual student performance | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=student_data.index[5:], y=student_data.values[5:]) | |
plt.title('Individual Student Performance') | |
plt.xlabel('Subject') | |
plt.ylabel('Score') | |
plt.xticks(rotation=45) | |
plt.show() | |
# %% | |
sns.pairplot(data.iloc[:1][['math score', 'reading score', 'writing score', 'physics score', 'computer science score']]) | |
plt.show() | |
# %% | |
import pandas as pd | |
import numpy as np | |
# Generate synthetic data for exam scores | |
np.random.seed(42) # for reproducibility | |
# Number of semesters | |
num_semesters = 6 | |
# Number of subjects | |
num_subjects = 5 | |
# Create a DataFrame to store the data | |
exam_scores = pd.DataFrame(np.random.randint(0, 101, size=(num_semesters, num_subjects)), | |
columns=['Subject 1', 'Subject 2', 'Subject 3', 'Subject 4', 'Subject 5']) | |
# Add semester column | |
exam_scores['Semester'] = range(1, num_semesters + 1) | |
# Save the data to a CSV file | |
exam_scores.to_csv("student_exam_scores.csv", index=False) | |
# Display the first few rows of the dataset | |
print(exam_scores.head()) | |
# %% | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# Load the dataset | |
exam_scores = pd.read_csv("student_exam_scores.csv") | |
# Line plot of exam scores over semesters for each subject | |
plt.figure(figsize=(10, 6)) | |
for subject in ['Subject 1', 'Subject 2', 'Subject 3', 'Subject 4', 'Subject 5']: | |
sns.lineplot(x='Semester', y=subject, data=exam_scores, label=subject) | |
plt.title('Exam Scores Over Semesters') | |
plt.xlabel('Semester') | |
plt.ylabel('Score') | |
plt.legend() | |
plt.grid(True) | |
plt.show() | |
# Box plot of exam scores distribution for each subject | |
plt.figure(figsize=(10, 6)) | |
sns.boxplot(data=exam_scores.drop('Semester', axis=1)) | |
plt.title('Distribution of Exam Scores for Each Subject') | |
plt.xlabel('Subject') | |
plt.ylabel('Score') | |
plt.show() | |
# %% | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# Load the dataset | |
exam_scores = pd.read_csv("student_exam_scores.csv") | |
# Separate plots for each subject | |
for subject in ['Subject 1', 'Subject 2', 'Subject 3', 'Subject 4', 'Subject 5']: | |
# Line plot of exam scores over semesters for the subject | |
plt.figure(figsize=(8, 5)) | |
sns.lineplot(x='Semester', y=subject, data=exam_scores) | |
plt.title(f'{subject} Exam Scores Over Semesters') | |
plt.xlabel('Semester') | |
plt.ylabel('Score') | |
plt.grid(True) | |
plt.show() | |
# Calculate difference between consecutive semesters | |
exam_scores_diff = exam_scores[[subject]].diff() | |
# Find semester with most improvement and decline | |
most_improved_semester = exam_scores_diff.idxmax()[0] | |
most_declined_semester = exam_scores_diff.idxmin()[0] | |
print(f"For {subject}:") | |
print(f"Most Improvement: Semester {most_improved_semester}, Score Increase: {exam_scores_diff.loc[most_improved_semester][0]}") | |
print(f"Quality Decline: Semester {most_declined_semester}, Score Decrease: {exam_scores_diff.loc[most_declined_semester][0]}\n") | |