{"cells":[{"cell_type":"markdown","metadata":{"id":"3p7vLrb-57Hq"},"source":["# Data Preparation"]},{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"df8tUqsfBPhr","executionInfo":{"status":"ok","timestamp":1735137420705,"user_tz":-330,"elapsed":4671,"user":{"displayName":"Malavika Nair Pambadiyil","userId":"11866192100661929468"}},"outputId":"a6ac71ec-9aa9-4c5c-f181-e08f9fd679da"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"code","source":["%cd /content/drive/MyDrive/MisinformationTruthTell/"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"thclhHRNBPeY","executionInfo":{"status":"ok","timestamp":1735137424623,"user_tz":-330,"elapsed":600,"user":{"displayName":"Malavika Nair Pambadiyil","userId":"11866192100661929468"}},"outputId":"2dbb8e86-be50-44fa-f23b-c37aace1f8b4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/MisinformationTruthTell\n"]}]},{"cell_type":"code","source":["!pip install -q kaggle"],"metadata":{"id":"x-WfCbUXBPbW"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from google.colab import files\n","files.upload()"],"metadata":{"id":"3qM2-SIEBPQB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!mkdir -p ~/.kaggle\n","!cp kaggle.json ~/.kaggle/\n","!chmod 600 ~/.kaggle/kaggle.json"],"metadata":{"id":"J1w8LiZQDqCF"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%cd /content/drive/MyDrive/MisinformationTruthTell/"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SeGe2Pr_Dp-n","executionInfo":{"status":"ok","timestamp":1735137457638,"user_tz":-330,"elapsed":569,"user":{"displayName":"Malavika Nair Pambadiyil","userId":"11866192100661929468"}},"outputId":"c9c9ae84-c9f2-41f0-8895-ecf39c0f99ef"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/MisinformationTruthTell\n"]}]},{"cell_type":"code","source":["! kaggle datasets download clmentbisaillon/fake-and-real-news-dataset"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"83M9FkN8Dp5V","executionInfo":{"status":"ok","timestamp":1735137463110,"user_tz":-330,"elapsed":2849,"user":{"displayName":"Malavika Nair Pambadiyil","userId":"11866192100661929468"}},"outputId":"e2758b57-2aff-4211-e315-27b13b44db21"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Dataset URL: https://www.kaggle.com/datasets/clmentbisaillon/fake-and-real-news-dataset\n","License(s): CC-BY-NC-SA-4.0\n","fake-and-real-news-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)\n"]}]},{"cell_type":"code","source":["path = '/content/drive/MyDrive/MisinformationTruthTell'"],"metadata":{"id":"VEwwd6gXDp2Y"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["! unzip fake-and-real-news-dataset.zip"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"f11AayAgGw6N","executionInfo":{"status":"ok","timestamp":1735137494582,"user_tz":-330,"elapsed":22923,"user":{"displayName":"Malavika Nair Pambadiyil","userId":"11866192100661929468"}},"outputId":"7b627453-5f12-4ab3-93e0-54ae5e6982ac"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Archive: fake-and-real-news-dataset.zip\n","replace Fake.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n"," inflating: Fake.csv \n","replace True.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y\n"," inflating: True.csv \n"]}]},{"cell_type":"code","source":["# Load Dataset\n","import pandas as pd\n","true_data = pd.read_csv('True.csv')\n","fake_data = pd.read_csv('Fake.csv')"],"metadata":{"id":"qgDU2JgpHDRF"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Generate labels True/Fake under new Target Column in 'true_data' and 'fake_data'\n","true_data['Target'] = ['True']*len(true_data)\n","fake_data['Target'] = ['Fake']*len(fake_data)\n","\n"],"metadata":{"id":"MODGntIcHDN6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Merge 'true_data' and 'fake_data', by random mixing into a single df called 'data'\n","\n","\n","data = pd.concat([true_data, fake_data]).sample(frac=1).reset_index(drop=True)\n","\n","\n","# See how the data looks like\n","print(data.shape)\n","data.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":224},"id":"LT9pUbAIHLr0","executionInfo":{"status":"ok","timestamp":1735137545461,"user_tz":-330,"elapsed":1225,"user":{"displayName":"Malavika Nair Pambadiyil","userId":"11866192100661929468"}},"outputId":"79497039-ea80-4c7b-8fb6-f990ff1855ff"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(44898, 5)\n"]},{"output_type":"execute_result","data":{"text/plain":[" title \\\n","0 THE LOST VIDEO: Watch MSNBC’S Mika Shamelessly... \n","1 Trump Is Now Threatening A Lawsuit Over Copyr... \n","2 Oklahoma Gun Range Refused Service To Muslim ... \n","3 Republican debate without Trump draws 12.5 mil... \n","4 SHOCKING REPORT: 50% of Babies in 24 States Bo... \n","\n"," text subject \\\n","0 With all the anger and nasty comments coming f... left-news \n","1 The artist who painted a nude portrait of Dona... News \n","2 You d think in the year 2016 we d no longer ha... News \n","3 LOS ANGELES (Reuters) - The Republican preside... politicsNews \n","4 New Mexico led all states with 72 percent of t... Government News \n","\n"," date Target \n","0 Jun 30, 2017 Fake \n","1 April 17, 2016 Fake \n","2 February 19, 2016 Fake \n","3 January 29, 2016 True \n","4 Mar 24, 2017 Fake "],"text/html":["\n","
\n"," | title | \n","text | \n","subject | \n","date | \n","Target | \n","
---|---|---|---|---|---|
0 | \n","THE LOST VIDEO: Watch MSNBC’S Mika Shamelessly... | \n","With all the anger and nasty comments coming f... | \n","left-news | \n","Jun 30, 2017 | \n","Fake | \n","
1 | \n","Trump Is Now Threatening A Lawsuit Over Copyr... | \n","The artist who painted a nude portrait of Dona... | \n","News | \n","April 17, 2016 | \n","Fake | \n","
2 | \n","Oklahoma Gun Range Refused Service To Muslim ... | \n","You d think in the year 2016 we d no longer ha... | \n","News | \n","February 19, 2016 | \n","Fake | \n","
3 | \n","Republican debate without Trump draws 12.5 mil... | \n","LOS ANGELES (Reuters) - The Republican preside... | \n","politicsNews | \n","January 29, 2016 | \n","True | \n","
4 | \n","SHOCKING REPORT: 50% of Babies in 24 States Bo... | \n","New Mexico led all states with 72 percent of t... | \n","Government News | \n","Mar 24, 2017 | \n","Fake | \n","
\n"," | text | \n","Target | \n","
---|---|---|
0 | \n","THE LOST VIDEO: Watch MSNBC’S Mika Shamelessly... | \n","Fake | \n","
1 | \n","Trump Is Now Threatening A Lawsuit Over Copyr... | \n","Fake | \n","
2 | \n","Oklahoma Gun Range Refused Service To Muslim ... | \n","Fake | \n","
3 | \n","Republican debate without Trump draws 12.5 mil... | \n","True | \n","
4 | \n","SHOCKING REPORT: 50% of Babies in 24 States Bo... | \n","Fake | \n","
... | \n","... | \n","... | \n","
44893 | \n","Syrian Kurdish YPG accuses Turkey of Afrin agg... | \n","True | \n","
44894 | \n","Turkey's military says two Turkish soldiers ki... | \n","True | \n","
44895 | \n","On Election Eve for five states, Trump rips Cr... | \n","True | \n","
44896 | \n","EU's Juncker hails Macron speech as 'very Euro... | \n","True | \n","
44897 | \n","Jewish groups in Germany urge crackdown on ant... | \n","True | \n","
44898 rows × 2 columns
\n","\n"," | text | \n","Target | \n","
---|---|---|
0 | \n","THE LOST VIDEO: Watch MSNBC’S Mika Shamelessly... | \n","1 | \n","
1 | \n","Trump Is Now Threatening A Lawsuit Over Copyr... | \n","1 | \n","
2 | \n","Oklahoma Gun Range Refused Service To Muslim ... | \n","1 | \n","
3 | \n","Republican debate without Trump draws 12.5 mil... | \n","0 | \n","
4 | \n","SHOCKING REPORT: 50% of Babies in 24 States Bo... | \n","1 | \n","
... | \n","... | \n","... | \n","
44893 | \n","Syrian Kurdish YPG accuses Turkey of Afrin agg... | \n","0 | \n","
44894 | \n","Turkey's military says two Turkish soldiers ki... | \n","0 | \n","
44895 | \n","On Election Eve for five states, Trump rips Cr... | \n","0 | \n","
44896 | \n","EU's Juncker hails Macron speech as 'very Euro... | \n","0 | \n","
44897 | \n","Jewish groups in Germany urge crackdown on ant... | \n","0 | \n","
44898 rows × 2 columns
\n","Epoch | \n","Training Loss | \n","Validation Loss | \n","Accuracy | \n","
---|---|---|---|
1 | \n","1.123000 | \n","0.627089 | \n","0.688196 | \n","
2 | \n","0.618600 | \n","0.555046 | \n","0.838307 | \n","
3 | \n","0.567200 | \n","0.507800 | \n","0.847216 | \n","
4 | \n","0.502300 | \n","0.475175 | \n","0.868820 | \n","
5 | \n","0.481700 | \n","0.451593 | \n","0.874833 | \n","
6 | \n","0.466500 | \n","0.435129 | \n","0.879287 | \n","
7 | \n","0.456700 | \n","0.422820 | \n","0.879733 | \n","
8 | \n","0.440400 | \n","0.415625 | \n","0.883296 | \n","
9 | \n","0.436000 | \n","0.410988 | \n","0.883073 | \n","
10 | \n","0.430900 | \n","0.409735 | \n","0.883296 | \n","
"]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=6320, training_loss=0.5362479149540769, metrics={'train_runtime': 7283.4335, 'train_samples_per_second': 55.479, 'train_steps_per_second': 0.868, 'total_flos': 5.352742644891648e+16, 'train_loss': 0.5362479149540769, 'epoch': 10.0})"]},"metadata":{},"execution_count":39}],"source":["from transformers import Trainer, TrainingArguments\n","\n","trainer = Trainer(\n"," model=model, train_dataset=dataset_splitted['train'],\n"," eval_dataset=dataset_splitted['test'],\n"," compute_metrics=compute_metrics,\n"," args=TrainingArguments(\n"," load_best_model_at_end=True,\n"," output_dir=\"./my_saved_model\", overwrite_output_dir=True,\n"," num_train_epochs=10, per_device_train_batch_size=64,\n"," per_device_eval_batch_size=64,\n"," eval_strategy = \"epoch\",\n"," save_strategy = \"epoch\",\n"," save_steps=10_000, save_total_limit=2),\n",")\n","\n","trainer.train()"]},{"cell_type":"code","source":[],"metadata":{"id":"Z3JPGFryJyjA"},"execution_count":null,"outputs":[]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[],"gpuType":"T4"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"},"widgets":{"application/vnd.jupyter.widget-state+json":{"8f2a9c284f3f4cd292f2cf82d9ac1058":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_12a29ededa3f4574aa23094b855b4170","IPY_MODEL_8659bef2fe2d470389267d5ac2058855","IPY_MODEL_922929a976374a91a65a4461fdd24184"],"layout":"IPY_MODEL_78837b15b48e4637a37098980fad1114"}},"12a29ededa3f4574aa23094b855b4170":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7285cb5c22e74cdb8fb3301f74cedfd6","placeholder":"","style":"IPY_MODEL_7d41d0ca750a4838bf1d58c6cf5d2e38","value":"Map: 100%"}},"8659bef2fe2d470389267d5ac2058855":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ea4f30d4f46143b5bf5310519866b93f","max":44898,"min":0,"orientation":"horizontal","style":"IPY_MODEL_28b841ebcc084555bd01481d2d8542cd","value":44898}},"922929a976374a91a65a4461fdd24184":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_325dab348bf8461ea6ae818535bd5d27","placeholder":"","style":"IPY_MODEL_9cac6f2fbc2b41228769dd1a118ec603","value":" 44898/44898 [01:27<00:00, 487.66 examples/s]"}},"78837b15b48e4637a37098980fad1114":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7285cb5c22e74cdb8fb3301f74cedfd6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7d41d0ca750a4838bf1d58c6cf5d2e38":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ea4f30d4f46143b5bf5310519866b93f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"28b841ebcc084555bd01481d2d8542cd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"325dab348bf8461ea6ae818535bd5d27":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9cac6f2fbc2b41228769dd1a118ec603":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}