{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "K6KNj8R5pFOi", "outputId": "73e388e8-294f-438d-ddc2-06ae7132580a" }, "outputs": [], "source": [ "!kaggle competitions download -c jigsaw-toxic-comment-classification-challenge\n", "!unzip jigsaw-toxic-comment-classification-challenge" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-a6Sx13TqW2h", "outputId": "eb6bb305-7b66-4f59-e1e3-24858c1309c4" }, "outputs": [], "source": [ "!unzip test.csv.zip \n", "!unzip test_labels.csv.zip \n", "!unzip train.csv.zip" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "Jt-aOqhVqavv" }, "outputs": [], "source": [ "import warnings\n", "import pandas as pd\n", "import torch\n", "import numpy as np\n", "from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler\n", "from sklearn.model_selection import train_test_split\n", "from transformers import Trainer, TrainingArguments\n", "from transformers import AutoTokenizer, AutoModelForSequenceClassification" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mps:0\n" ] } ], "source": [ "# Use GPU\n", "#device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n", "device = \"mps:0\" if torch.backends.mps.is_available() else \"cpu\"\n", "print(device)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "zMDF7x0H4VFW" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcomment_texttoxicsevere_toxicobscenethreatinsultidentity_hate
00000997932d777bfExplanation\\nWhy the edits made under my usern...000000
\n", "
" ], "text/plain": [ " id comment_text toxic \\\n", "0 0000997932d777bf Explanation\\nWhy the edits made under my usern... 0 \n", "\n", " severe_toxic obscene threat insult identity_hate \n", "0 0 0 0 0 0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load training text and label dataset\n", "# Preprocess data\n", "\n", "#test_texts = pd.read_csv(\"test.csv\").values.tolist()\n", "#test_labels = pd.read_csv('test_labels.csv').values.tolist()\n", "\n", "train = pd.read_csv('train.csv')\n", "train.head(1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "159571 (159571, 8)\n", "id False\n", "comment_text False\n", "toxic False\n", "severe_toxic False\n", "obscene False\n", "threat False\n", "insult False\n", "identity_hate False\n", "dtype: bool\n", "False\n" ] } ], "source": [ "# Any duplicates?\n", "print(len(train['comment_text'].unique()), train.shape)\n", "\n", "# Any missing values?\n", "print(train.isnull().any())\n", "print(train.isnull().values.any())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcomment_texttoxicsevere_toxicobscenethreatinsultidentity_hategrouped_labels
00000997932d777bfExplanation\\nWhy the edits made under my usern...000000[0, 0, 0, 0, 0, 0]
\n", "
" ], "text/plain": [ " id comment_text toxic \\\n", "0 0000997932d777bf Explanation\\nWhy the edits made under my usern... 0 \n", "\n", " severe_toxic obscene threat insult identity_hate grouped_labels \n", "0 0 0 0 0 0 [0, 0, 0, 0, 0, 0] " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Group labels to get right format for training\n", "labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']\n", "train['grouped_labels'] = train[labels].values.tolist()\n", "train.head(1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Convert to list from dataframe\n", "train_texts = train['comment_text'].values.tolist()\n", "train_labels = train['grouped_labels'].values.tolist()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "vkxJ6NkFlc46", "tags": [] }, "outputs": [], "source": [ "# Use distilbert, a faster model of BERT which keeps 95% of the performance\n", "model_name = \"bert-base-uncased\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 0, 1, 1, 0, 0] 11\n", "[1, 1, 0, 1, 0, 0] 11\n", "[1, 0, 0, 1, 0, 1] 7\n", "[1, 1, 0, 0, 1, 1] 7\n", "[1, 1, 1, 0, 0, 1] 6\n", "[1, 1, 1, 1, 0, 0] 4\n", "[0, 0, 0, 1, 1, 0] 3\n", "[1, 0, 0, 1, 1, 1] 3\n", "[1, 1, 0, 0, 0, 1] 3\n", "[0, 0, 1, 0, 0, 1] 3\n", "[0, 0, 1, 1, 0, 0] 2\n", "[0, 0, 1, 1, 1, 0] 2\n", "[1, 1, 0, 1, 1, 0] 1\n", "[1, 1, 0, 1, 0, 1] 1\n", "Name: grouped_labels, dtype: int64\n", "df label indices with only one instance: [159029, 158498, 157010, 154553, 149180, 144159, 139501, 138026, 134459, 133505, 127410, 120395, 115766, 113304, 110056, 107881, 107096, 101089, 98699, 86746, 76454, 74607, 68264, 66350, 63687, 61934, 57594, 53408, 45101, 41461, 36141, 31191, 30566, 29445, 23374, 17187, 15977, 9487, 8979, 6316, 6063, 2374]\n" ] } ], "source": [ "# Also do preprocessing to see if there are any unique rows\n", "# with that specfic combination of labels\n", "# If that is the case, we want to include that row in the training data\n", "\n", "# Find unique label combinations\n", "label_counts = train['grouped_labels'].astype(str).value_counts()\n", "print(label_counts[-14:])\n", "\n", "# Take low frequency labels\n", "low_freq = label_counts[label_counts<10].keys()\n", "low_freq_inds = sorted(list(train[train['grouped_labels'].astype(str).isin(low_freq)].index), reverse=True)\n", "print('df label indices with only one instance: ', low_freq_inds)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "low_freq_train_texts = [train_texts.pop(i) for i in low_freq_inds]\n", "low_freq_train_labels = [train_labels.pop(i) for i in low_freq_inds]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Add low freq values to training data\n", "train_texts.extend(low_freq_train_texts)\n", "train_labels.extend(low_freq_train_labels)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Split datasets for training\n", "train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Shorten token to increase training speed, average is below this\n", "max_length = 100\n", "train_encodings = tokenizer(train_texts, truncation=True, padding=True, return_tensors=\"pt\", max_length=max_length).to(device)\n", "val_encodings = tokenizer(val_texts, truncation=True, padding=True, return_tensors=\"pt\", max_length=max_length).to(device)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "class ToxicDataset(Dataset):\n", " def __init__(self, encodings, labels):\n", " self.encodings = encodings\n", " self.labels = [[float(y) for y in x] for x in labels]\n", "\n", " def __getitem__(self, idx):\n", " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n", " item['labels'] = torch.tensor(self.labels[idx])\n", " return item\n", "\n", " def __len__(self):\n", " return len(self.labels)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "train_dataset = ToxicDataset(train_encodings, train_labels)\n", "val_dataset = ToxicDataset(val_encodings, val_labels)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "model = AutoModelForSequenceClassification.from_pretrained(model_name,\n", " num_labels=6,\n", " ).to(device)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": true, "id": "CI2B0V5D27gA", "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n", "PyTorch: setting up devices\n", "***** Running training *****\n", " Num examples = 127656\n", " Num Epochs = 1\n", " Instantaneous batch size per device = 16\n", " Total train batch size (w. parallel, distributed & accumulation) = 16\n", " Gradient Accumulation steps = 1\n", " Total optimization steps = 7979\n", " Number of trainable parameters = 109486854\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [ 33/7979 00:21 < 1:33:06, 1.42 it/s, Epoch 0.00/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
100.605800
200.590100
300.550200

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[56], line 28\u001b[0m\n\u001b[1;32m 9\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArgumentsWithMPSSupport(\n\u001b[1;32m 10\u001b[0m output_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./results\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 11\u001b[0m num_train_epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 18\u001b[0m logging_steps\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[1;32m 19\u001b[0m )\n\u001b[1;32m 21\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m 22\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 23\u001b[0m args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[1;32m 24\u001b[0m train_dataset\u001b[38;5;241m=\u001b[39mtrain_dataset,\n\u001b[1;32m 25\u001b[0m eval_dataset\u001b[38;5;241m=\u001b[39mval_dataset,\n\u001b[1;32m 26\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:1501\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1498\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1500\u001b[0m )\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1503\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1504\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1505\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1506\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:1749\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1747\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1749\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1751\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1752\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 1753\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[1;32m 1754\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 1755\u001b[0m ):\n\u001b[1;32m 1756\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 1757\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n", "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:2526\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2524\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeepspeed\u001b[38;5;241m.\u001b[39mbackward(loss)\n\u001b[1;32m 2525\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 2526\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\u001b[38;5;241m.\u001b[39mdetach()\n", "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/torch/_tensor.py:488\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 479\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 480\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 481\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 486\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 487\u001b[0m )\n\u001b[0;32m--> 488\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/torch/autograd/__init__.py:204\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 199\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m 201\u001b[0m \u001b[38;5;66;03m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 204\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "class TrainingArgumentsWithMPSSupport(TrainingArguments):\n", " @property\n", " def device(self) -> torch.device:\n", " if torch.backends.mps.is_available():\n", " return torch.device(\"mps\")\n", " else:\n", " return torch.device(\"cpu\")\n", "\n", "training_args = TrainingArgumentsWithMPSSupport(\n", " output_dir = './results',\n", " num_train_epochs=1,\n", " per_device_train_batch_size=16,\n", " per_device_eval_batch_size=16,\n", " warmup_steps=500,\n", " learning_rate=5e-5,\n", " weight_decay=0.01,\n", " logging_dir='./logs',\n", " logging_steps=10,\n", ")\n", "\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=val_dataset,\n", ")\n", "\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Saving model checkpoint to ./model_checkpoint/done\n", "Configuration saved in ./model_checkpoint/done/config.json\n", "Model weights saved in ./model_checkpoint/done/pytorch_model.bin\n" ] } ], "source": [ "trainer.save_model('./model_checkpoint/done')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "tags": [] }, "outputs": [], "source": [ "from transformers import BertTokenizer, BertForSequenceClassification\n", "#saved = DistilBertModel.from_pretrained('./model_checkpoint/trained', num_labels=6, problem_type=\"multi_label_classification\")\n", "saved = BertForSequenceClassification.from_pretrained('./model_checkpoint/fine_tuned')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'trainer' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241m.\u001b[39mevaluate()\n", "\u001b[0;31mNameError\u001b[0m: name 'trainer' is not defined" ] } ], "source": [ "trainer.evaluate()" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[0.4601849317550659,\n", " 0.0626736581325531,\n", " 0.1962047964334488,\n", " 0.0715285912156105,\n", " 0.1363525241613388,\n", " 0.0730554461479187]]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text = \"fun\"\n", "encoded_input = tokenizer(text, return_tensors=\"pt\")\n", "outputs = saved(**encoded_input)\n", "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n", "predictions = predictions.cpu().detach().numpy()\n", "predictions.tolist()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "res = [1 if x >= 0.5 else 0 for x in predictions[0]]" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1, 0, 0, 0, 0, 0]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 4 }