diff --git "a/HAR_Part_2.ipynb" "b/HAR_Part_2.ipynb"
new file mode 100644--- /dev/null
+++ "b/HAR_Part_2.ipynb"
@@ -0,0 +1,1184 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU",
+ "gpuClass": "standard"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Applying Machine Learning Models"
+ ],
+ "metadata": {
+ "id": "sQGcMFmd1LwL"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "drl_NkpR1AUz"
+ },
+ "outputs": [],
+ "source": [
+ "#importing required libraries \n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import itertools\n",
+ "import matplotlib.pyplot as plt\n",
+ "from sklearn.metrics import confusion_matrix\n",
+ "from datetime import datetime\n",
+ "\n",
+ "from sklearn import linear_model\n",
+ "from sklearn import metrics\n",
+ "\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from sklearn.svm import LinearSVC\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.ensemble import GradientBoostingClassifier"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Loading Data form File"
+ ],
+ "metadata": {
+ "id": "Lm6VtVdM1bCS"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "train = pd.read_csv('/content/drive/MyDrive/UCI_HAR_Dataset/csv_files/train.csv')\n",
+ "test = pd.read_csv('/content/drive/MyDrive/UCI_HAR_Dataset/csv_files/test.csv')\n",
+ "print(train.shape, test.shape)"
+ ],
+ "metadata": {
+ "id": "2T3Z0hWP1aK4",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "2caee9cb-bbd7-4d4d-8e28-d2209b176a29"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(7352, 564) (2947, 564)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "train.head(2)"
+ ],
+ "metadata": {
+ "id": "C9HrxSH97urK",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 222
+ },
+ "outputId": "1064d9e7-2505-4979-ae84-bf57d41e29f4"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " tBodyAcc-mean()-X tBodyAcc-mean()-Y tBodyAcc-mean()-Z tBodyAcc-std()-X \\\n",
+ "0 0.288585 -0.020294 -0.132905 -0.995279 \n",
+ "1 0.278419 -0.016411 -0.123520 -0.998245 \n",
+ "\n",
+ " tBodyAcc-std()-Y tBodyAcc-std()-Z tBodyAcc-mad()-X tBodyAcc-mad()-Y \\\n",
+ "0 -0.983111 -0.913526 -0.995112 -0.983185 \n",
+ "1 -0.975300 -0.960322 -0.998807 -0.974914 \n",
+ "\n",
+ " tBodyAcc-mad()-Z tBodyAcc-max()-X ... angle(tBodyAccMean,gravity) \\\n",
+ "0 -0.923527 -0.934724 ... -0.112754 \n",
+ "1 -0.957686 -0.943068 ... 0.053477 \n",
+ "\n",
+ " angle(tBodyAccJerkMean),gravityMean) angle(tBodyGyroMean,gravityMean) \\\n",
+ "0 0.030400 -0.464761 \n",
+ "1 -0.007435 -0.732626 \n",
+ "\n",
+ " angle(tBodyGyroJerkMean,gravityMean) angle(X,gravityMean) \\\n",
+ "0 -0.018446 -0.841247 \n",
+ "1 0.703511 -0.844788 \n",
+ "\n",
+ " angle(Y,gravityMean) angle(Z,gravityMean) subject Activity ActivityName \n",
+ "0 0.179941 -0.058627 1 5 STANDING \n",
+ "1 0.180289 -0.054317 1 5 STANDING \n",
+ "\n",
+ "[2 rows x 564 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tBodyAcc-mean()-X | \n",
+ " tBodyAcc-mean()-Y | \n",
+ " tBodyAcc-mean()-Z | \n",
+ " tBodyAcc-std()-X | \n",
+ " tBodyAcc-std()-Y | \n",
+ " tBodyAcc-std()-Z | \n",
+ " tBodyAcc-mad()-X | \n",
+ " tBodyAcc-mad()-Y | \n",
+ " tBodyAcc-mad()-Z | \n",
+ " tBodyAcc-max()-X | \n",
+ " ... | \n",
+ " angle(tBodyAccMean,gravity) | \n",
+ " angle(tBodyAccJerkMean),gravityMean) | \n",
+ " angle(tBodyGyroMean,gravityMean) | \n",
+ " angle(tBodyGyroJerkMean,gravityMean) | \n",
+ " angle(X,gravityMean) | \n",
+ " angle(Y,gravityMean) | \n",
+ " angle(Z,gravityMean) | \n",
+ " subject | \n",
+ " Activity | \n",
+ " ActivityName | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.288585 | \n",
+ " -0.020294 | \n",
+ " -0.132905 | \n",
+ " -0.995279 | \n",
+ " -0.983111 | \n",
+ " -0.913526 | \n",
+ " -0.995112 | \n",
+ " -0.983185 | \n",
+ " -0.923527 | \n",
+ " -0.934724 | \n",
+ " ... | \n",
+ " -0.112754 | \n",
+ " 0.030400 | \n",
+ " -0.464761 | \n",
+ " -0.018446 | \n",
+ " -0.841247 | \n",
+ " 0.179941 | \n",
+ " -0.058627 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " STANDING | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.278419 | \n",
+ " -0.016411 | \n",
+ " -0.123520 | \n",
+ " -0.998245 | \n",
+ " -0.975300 | \n",
+ " -0.960322 | \n",
+ " -0.998807 | \n",
+ " -0.974914 | \n",
+ " -0.957686 | \n",
+ " -0.943068 | \n",
+ " ... | \n",
+ " 0.053477 | \n",
+ " -0.007435 | \n",
+ " -0.732626 | \n",
+ " 0.703511 | \n",
+ " -0.844788 | \n",
+ " 0.180289 | \n",
+ " -0.054317 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " STANDING | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 564 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### **Making Data for Model**"
+ ],
+ "metadata": {
+ "id": "bm1Q842V7x1j"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X_train = train.drop(['subject', 'Activity', 'ActivityName'], axis=1)\n",
+ "y_train = train.ActivityName\n",
+ "\n",
+ "X_test = test.drop(['subject', 'Activity', 'ActivityName'], axis=1)\n",
+ "y_test = test.ActivityName\n",
+ "\n",
+ "print('X_train and y_train : ({},{})'.format(X_train.shape, y_train.shape))\n",
+ "print('X_test and y_test : ({},{})'.format(X_test.shape, y_test.shape))"
+ ],
+ "metadata": {
+ "id": "2iKFWEvy7vet",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "4f2cf9f4-98e6-42b8-f1a0-11b8257aa3cd"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "X_train and y_train : ((7352, 561),(7352,))\n",
+ "X_test and y_test : ((2947, 561),(2947,))\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Creat a model with our Dataset**"
+ ],
+ "metadata": {
+ "id": "K88PMVcEAOd6"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Labels that are useful in plotting confusion matrix\n",
+ "labels=['LAYING', 'SITTING','STANDING','WALKING','WALKING_DOWNSTAIRS','WALKING_UPSTAIRS']"
+ ],
+ "metadata": {
+ "id": "d0IitEyHAb5U"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Let's define a function to plot the confusion matrix**"
+ ],
+ "metadata": {
+ "id": "doaaVuPrAXfc"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def plot_confusion_matrix(cm, classes,\n",
+ " normalize=False,\n",
+ " title='Confusion matrix',\n",
+ " cmap=plt.cm.Blues):\n",
+ " if normalize:\n",
+ " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
+ "\n",
+ " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
+ " plt.title(title)\n",
+ " plt.colorbar()\n",
+ " tick_marks = np.arange(len(classes))\n",
+ " plt.xticks(tick_marks, classes, rotation=90)\n",
+ " plt.yticks(tick_marks, classes)\n",
+ "\n",
+ " fmt = '.2f' if normalize else 'd'\n",
+ " thresh = cm.max() / 2.\n",
+ " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
+ " plt.text(j, i, format(cm[i, j], fmt),\n",
+ " horizontalalignment=\"center\",\n",
+ " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
+ "\n",
+ " plt.tight_layout()\n",
+ " plt.ylabel('True label')\n",
+ " plt.xlabel('Predicted label')"
+ ],
+ "metadata": {
+ "id": "UeaZTd0qAfv9"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Let's define a function to run any model specified**"
+ ],
+ "metadata": {
+ "id": "B5QFLapTBNGA"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from datetime import datetime\n",
+ "def perform_model(model, X_train, y_train, X_test, y_test, class_labels, cm_normalize=True, \\\n",
+ " print_cm=True, cm_cmap=plt.cm.Greens):\n",
+ " \n",
+ " \n",
+ " # to store results at various phases\n",
+ " results = dict()\n",
+ " \n",
+ " # time at which model starts training \n",
+ " train_start_time = datetime.now()\n",
+ " print('training the model..')\n",
+ " model.fit(X_train, y_train)\n",
+ " print('Done....!\\n')\n",
+ " train_end_time = datetime.now()\n",
+ " results['training_time'] = train_end_time - train_start_time\n",
+ " print('==> training time:- {}\\n'.format(results['training_time']))\n",
+ " \n",
+ " \n",
+ " # predict test data\n",
+ " print('Predicting test data')\n",
+ " test_start_time = datetime.now()\n",
+ " y_pred = model.predict(X_test)\n",
+ " test_end_time = datetime.now()\n",
+ " print('Done....!\\n')\n",
+ " results['testing_time'] = test_end_time - test_start_time\n",
+ " print('==> testing time:- {}\\n'.format(results['testing_time']))\n",
+ " results['predicted'] = y_pred\n",
+ " \n",
+ "\n",
+ " # calculate overall accuracty of the model\n",
+ " accuracy = metrics.accuracy_score(y_true=y_test, y_pred=y_pred)\n",
+ " # store accuracy in results\n",
+ " results['accuracy'] = accuracy\n",
+ " print('==> Accuracy:- {}\\n'.format(accuracy))\n",
+ " \n",
+ " \n",
+ " \n",
+ " # confusion matrix\n",
+ " cm = metrics.confusion_matrix(y_test, y_pred)\n",
+ " results['confusion_matrix'] = cm\n",
+ " if print_cm: \n",
+ " print('\\n ********Confusion Matrix********')\n",
+ " print('\\n {}'.format(cm))\n",
+ " \n",
+ " # plot confusin matrix\n",
+ " plt.figure(figsize=(6,6))\n",
+ " plt.grid(b=False)\n",
+ " plot_confusion_matrix(cm, classes=class_labels, normalize=True, title='Normalized confusion matrix', cmap = cm_cmap)\n",
+ " plt.show()\n",
+ " \n",
+ " # get classification report\n",
+ " print('****************| Classifiction Report |****************')\n",
+ " classification_report = metrics.classification_report(y_test, y_pred)\n",
+ " \n",
+ " # store report in results\n",
+ " results['classification_report'] = classification_report\n",
+ " print(classification_report)\n",
+ " \n",
+ " # add the trained model to the results\n",
+ " results['model'] = model\n",
+ " \n",
+ " return results"
+ ],
+ "metadata": {
+ "id": "II4zsTIdBeXT"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Define function to print the gridsearch Parameters**"
+ ],
+ "metadata": {
+ "id": "NJWTvtZmCiPD"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "def print_grid_search_attributes(model):\n",
+ " # Estimator that gave highest score among all the estimators formed in GridSearch\n",
+ " print('\\n\\n==> Best Estimator:')\n",
+ " print('\\t{}\\n'.format(model.best_estimator_))\n",
+ "\n",
+ "\n",
+ " # parameters that gave best results while performing grid search\n",
+ " print('\\n==> Best parameters:')\n",
+ " print('\\tParameters of best estimator : {}'.format(model.best_params_))\n",
+ "\n",
+ "\n",
+ " # number of cross validation splits\n",
+ " print('\\n==> No. of CrossValidation sets:')\n",
+ " print('\\tTotal numbre of cross validation sets: {}'.format(model.n_splits_))\n",
+ "\n",
+ "\n",
+ " # Average cross validated score of the best estimator, from the Grid Search \n",
+ " print('\\n==> Best Score:')\n",
+ " print('\\tAverage Cross Validate scores of best estimator : {}'.format(model.best_score_))"
+ ],
+ "metadata": {
+ "id": "SUmk6iANChqE"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### 1. Logistic Regression with Grid Search"
+ ],
+ "metadata": {
+ "id": "SED_FTJ9C_0a"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import warnings\n",
+ "from sklearn.exceptions import ConvergenceWarning\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "warnings.simplefilter(action='ignore', category=ConvergenceWarning)\n",
+ "\n",
+ "#Grid search\n",
+ "parameters = {'C':[0.01, 0.1, 1, 10, 20, 30], 'penalty':['l2','l1']}\n",
+ "log_reg = linear_model.LogisticRegression()\n",
+ "log_reg_grid = GridSearchCV(log_reg, param_grid=parameters, cv=3, verbose=1, n_jobs=-1)\n",
+ "log_reg_grid_results = perform_model(log_reg_grid, X_train, y_train, X_test, y_test, class_labels=labels)\n",
+ "\n",
+ "# observe the attributes of the model \n",
+ "print_grid_search_attributes(log_reg_grid_results['model'])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "PB5MBnOqDGpK",
+ "outputId": "75410cc9-e23b-4573-f8ee-d8b73ed679a1"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "training the model..\n",
+ "Fitting 3 folds for each of 12 candidates, totalling 36 fits\n",
+ "Done....!\n",
+ "\n",
+ "==> training time:- 0:00:41.661560\n",
+ "\n",
+ "Predicting test data\n",
+ "Done....!\n",
+ "\n",
+ "==> testing time:- 0:00:00.014502\n",
+ "\n",
+ "==> Accuracy:- 0.9579233118425518\n",
+ "\n",
+ "\n",
+ " ********Confusion Matrix********\n",
+ "\n",
+ " [[537 0 0 0 0 0]\n",
+ " [ 0 430 58 0 0 3]\n",
+ " [ 0 16 516 0 0 0]\n",
+ " [ 0 0 0 492 3 1]\n",
+ " [ 0 0 0 4 403 13]\n",
+ " [ 0 0 0 24 2 445]]\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "