{ "cells": [ { "cell_type": "markdown", "id": "b2e049c7-d5db-45e6-b651-2601c02f4b7d", "metadata": {}, "source": [ "## Data organisation example - IDRiD" ] }, { "cell_type": "code", "execution_count": null, "id": "16b65740-249b-4eef-9298-1db01f72d050", "metadata": {}, "outputs": [], "source": [ "import os\n", "import shutil\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "markdown", "id": "b12bad44", "metadata": {}, "source": [] }, { "attachments": {}, "cell_type": "markdown", "id": "ff0bf26e-c657-49de-8761-89d5a94c390d", "metadata": {}, "source": [ "### Split val set from train data\n", "- Download dataset from [official website](https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid) \n", "- Images can be processed if necessary, with any processing tools such as [AutoMorph](https://github.com/rmaphoh/AutoMorph)" ] }, { "cell_type": "code", "execution_count": null, "id": "4bc1cb67-0adf-4640-8640-d0740a39366b", "metadata": {}, "outputs": [], "source": [ "list_ = pd.read_csv('IDRiD_Disease_Grading_Training_Labels.csv')" ] }, { "cell_type": "code", "execution_count": null, "id": "b85fc0d1-2049-4550-bdec-76240b1bc759", "metadata": {}, "outputs": [], "source": [ "noDR = list_.loc[list_['Retinopathy grade']==0, 'Image name']\n", "mildDR = list_.loc[list_['Retinopathy grade']==1, 'Image name']\n", "moderateDR = list_.loc[list_['Retinopathy grade']==2, 'Image name']\n", "severeDR = list_.loc[list_['Retinopathy grade']==3, 'Image name']\n", "proDR = list_.loc[list_['Retinopathy grade']==4, 'Image name']" ] }, { "cell_type": "code", "execution_count": null, "id": "d0617e35-8b91-45d3-90d5-d5e5bf2d7762", "metadata": {}, "outputs": [], "source": [ "noDR_train, noDR_val = train_test_split(noDR, test_size=0.2,random_state=1)\n", "mildDR_train, mildDR_val = train_test_split(mildDR, test_size=0.2,random_state=1)\n", "moderateDR_train, moderateDR_val = train_test_split(moderateDR, test_size=0.2,random_state=1)\n", "severeDR_train, severeDR_val = train_test_split(severeDR, test_size=0.2,random_state=1)\n", "proDR_train, proDR_val = train_test_split(proDR, test_size=0.2,random_state=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "f30ce03f-5730-4e68-b6c5-8e1b6b9167f8", "metadata": {}, "outputs": [], "source": [ "for i in noDR_train:\n", " shutil.copy('./train_processed/{}.png'.format(i), './train/a_noDR/{}.png'.format(i))\n", " \n", "for i in mildDR_train:\n", " shutil.copy('./train_processed/{}.png'.format(i), './train/b_mildDR/{}.png'.format(i))\n", " \n", "for i in moderateDR_train:\n", " shutil.copy('./train_processed/{}.png'.format(i), './train/c_moderateDR/{}.png'.format(i))\n", " \n", "for i in severeDR_train:\n", " shutil.copy('./train_processed/{}.png'.format(i), './train/d_severeDR/{}.png'.format(i))\n", " \n", "for i in proDR_train:\n", " shutil.copy('./train_processed/{}.png'.format(i), './train/e_proDR/{}.png'.format(i))" ] }, { "cell_type": "code", "execution_count": null, "id": "196d1845-3e5e-4d38-82e5-66057a693962", "metadata": {}, "outputs": [], "source": [ "for i in noDR_val:\n", " shutil.copy('./train_processed/{}.png'.format(i), './val/a_noDR/{}.png'.format(i))\n", " \n", "for i in mildDR_val:\n", " shutil.copy('./train_processed/{}.png'.format(i), './val/b_mildDR/{}.png'.format(i))\n", " \n", "for i in moderateDR_val:\n", " shutil.copy('./train_processed/{}.png'.format(i), './val/c_moderateDR/{}.png'.format(i))\n", " \n", "for i in severeDR_val:\n", " shutil.copy('./train_processed/{}.png'.format(i), './val/d_severeDR/{}.png'.format(i))\n", " \n", "for i in proDR_val:\n", " shutil.copy('./train_processed/{}.png'.format(i), './val/e_proDR/{}.png'.format(i))" ] }, { "cell_type": "markdown", "id": "faf285f4-9079-49ca-9d99-8f3f5718afbf", "metadata": {}, "source": [ "### Organise test set" ] }, { "cell_type": "code", "execution_count": null, "id": "118d15d0-9e94-4f6e-855d-dfa3796b24d2", "metadata": {}, "outputs": [], "source": [ "list_test = pd.read_csv('IDRiD_Disease_Grading_Testing_Labels.csv')" ] }, { "cell_type": "code", "execution_count": null, "id": "89a098fe-0aad-41d4-ab09-476ff0354c77", "metadata": {}, "outputs": [], "source": [ "noDR_test = list_test.loc[list_test['Retinopathy grade']==0, 'Image name']\n", "mildDR_test = list_test.loc[list_test['Retinopathy grade']==1, 'Image name']\n", "moderateDR_test = list_test.loc[list_test['Retinopathy grade']==2, 'Image name']\n", "severeDR_test = list_test.loc[list_test['Retinopathy grade']==3, 'Image name']\n", "proDR_test = list_test.loc[list_test['Retinopathy grade']==4, 'Image name']" ] }, { "cell_type": "code", "execution_count": null, "id": "33a207c1-1fef-4e79-8ff2-84329062495b", "metadata": {}, "outputs": [], "source": [ "for i in noDR_test:\n", " shutil.copy('./test_processed/{}.png'.format(i), './test/a_noDR/{}.png'.format(i))\n", " \n", "for i in mildDR_test:\n", " shutil.copy('./test_processed/{}.png'.format(i), './test/b_mildDR/{}.png'.format(i))\n", " \n", "for i in moderateDR_test:\n", " shutil.copy('./test_processed/{}.png'.format(i), './test/c_moderateDR/{}.png'.format(i))\n", " \n", "for i in severeDR_test:\n", " shutil.copy('./test_processed/{}.png'.format(i), './test/d_severeDR/{}.png'.format(i))\n", " \n", "for i in proDR_test:\n", " shutil.copy('./test_processed/{}.png'.format(i), './test/e_proDR/{}.png'.format(i))" ] } ], "metadata": { "environment": { "kernel": "python3", "name": "common-cu110.m91", "type": "gcloud", "uri": "gcr.io/deeplearning-platform-release/base-cu110:m91" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 5 }