Spaces:

armansakif
/

BenFake

Sleeping

App Files Files Community

BenFake / app.py

armansakif

huggingface do not have cuda

6f75d60 about 1 year ago

raw

history blame contribute delete

No virus

9.1 kB

	import numpy as np
	import torch

	if torch.cuda.is_available():
	device = torch.device("cuda")
	print('We will use the GPU:', torch.cuda.get_device_name(0))
	else:
	device = torch.device("cpu")
	print('No GPU available, using the CPU instead.')

	import numpy as np
	import gradio as gr
	from transformers import BertTokenizer, AutoTokenizer
	from torch.utils.data import TensorDataset, random_split
	from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
	from transformers import BertForSequenceClassification, AdamW, BertConfig
	import random
	tokenizer = AutoTokenizer.from_pretrained('armansakif/bengali-fake-news')

	model = BertForSequenceClassification.from_pretrained(
	"armansakif/bengali-fake-news", # Use the 12-layer BERT model, with an uncased vocab.
	num_labels = 2, # The number of output labels--2 for binary classification.
	# You can increase this for multi-class tasks.
	output_attentions = False, # Whether the model returns attentions weights.
	output_hidden_states = False, # Whether the model returns all hidden-states.
	)
	# model.cuda()

	def classify_news(news):
	label_list = []
	input_ids = []
	attention_masks = []
	sent = news
	label_list.append(0)
	encoded_dict = tokenizer.encode_plus(
	sent, # Sentence to encode.
	add_special_tokens = True, # Add '[CLS]' and '[SEP]'
	max_length = 512, # Pad & truncate all sentences.
	pad_to_max_length = True,
	return_attention_mask = True, # Construct attn. masks.
	truncation = True,
	return_tensors = 'pt', # Return pytorch tensors.
	)

	input_ids.append(encoded_dict['input_ids'])

	attention_masks.append(encoded_dict['attention_mask'])

	# Convert the lists into tensors.
	input_ids = torch.cat(input_ids, dim=0)
	attention_masks = torch.cat(attention_masks, dim=0)
	labels = torch.tensor(label_list)

	testdataset = TensorDataset(input_ids, attention_masks, labels)

	test_dataloader = DataLoader(
	testdataset, # The validation samples.
	sampler = SequentialSampler(testdataset), # Pull out batches sequentially.
	batch_size = 16 # Evaluate with this batch size.
	)

	model.eval()

	y_prob = []

	for batch in test_dataloader:

	b_input_ids = batch[0].to(device)
	b_input_mask = batch[1].to(device)
	b_labels = batch[2].to(device)

	with torch.no_grad():

	outputs = model(b_input_ids,
	token_type_ids=None,
	attention_mask=b_input_mask,
	labels=b_labels)
	loss = outputs[0]
	logits = outputs[1]

	# probability in percent code
	prediction_probs = torch.nn.functional.softmax(logits)
	y_prob.extend(prediction_probs.detach().cpu().numpy())

	print(y_prob[0][0])
	print(y_prob[0][1])
	#-------------------------------------------------------------

	_, prediction = torch.max(logits, dim=1)
	prediction = prediction.cpu().detach().numpy()
	# targets = b_labels.cpu().detach().numpy()

	result = 'Fake News'
	if prediction[0] :
	result = 'Authentic News'
	print(result)
	labels = ['fake', 'authentic']

	return {labels[i]: float(y_prob[0][i]) for i in range(2)}
	demo = gr.Interface(
	fn=classify_news,
	inputs=gr.Textbox(lines=10, placeholder="News here..."),
	# outputs=[gr.Textbox(label='class'), gr.Textbox(label='prbability')],
	outputs = gr.outputs.Label(num_top_classes=2),
	examples = [
	['খেলা হবে - বাংলাদেশের এক বিশেষ ডায়লগ। এই ডায়লগ সবার আগে কে বলেছিলেন তার নাম বাংলার সবাই জানে। তবু যারা জানেন না তাদের সুবিধার্থে, নারায়ণগঞ্জের সংসদ সদস্য শামীম ওসমান একবার তার ভাষণে এই খেলা হবে ডায়ালগটা ব্যবহার করেন। তার ভাইরাল হওয়া ভাষণ ছিল, ২ মিনিট ১১ সেকেন্ডের, সেই ভাষণে তিনি ‘খেলা হবে’ শব্দ ব্যবহার করেছেন মোট ৩ বার! তাও শরীর ঝাঁকিয়ে এবং গলার সর্বস্বক্তি দিয়ে। তার বলা এই ডায়লগ বাংলাদেশের বুকেই থেকে যায়নি। উড়তে উড়তে গিয়ে ঠেকেছিল ভারতের বুকেও। বলা যায়, বাংলাদেশের থেকে বেশি জনপ্রিয়তাই পেয়ে বসেছিল ভারতে। ভারতের পশ্চিমবঙ্গের সবচেয়ে জনপ্রিয় স্লোগান হচ্ছে এই খেলা হবে। শুধু স্লোগানেই থেমে থাকেনি আমাদের -খেলা হবে। ছড়িয়ে গেছে মুভিতেও। আলিয়া ভাট ও রনবীর সিং অভিনীত রকি ওর রাণী মুভিতেও ব্যবহার করা হয়েছে খেলা হবে ডায়লগ। মুভির নায়িকা স্বয়ং আলিয়া ভাটই একটা সিনে বলেছেন, খেলা হবে। এমনকি তিনি এটা বাংলাতেই বলেছেন!' ],
	[ " সারা দেশে ডেঙ্গু পরিস্থিতি দিন দিন আরও ভয়াবহ রূপ নিচ্ছে। ডেঙ্গু জ্বরে আক্রান্ত হয়ে গত ২৪ ঘণ্টায় সারাদেশে ৮ জনের মৃত্যু হয়েছে। এ নিয়ে চলতি বছর ডেঙ্গু আক্রান্ত হয়ে মৃতের সংখ্যা দাঁড়িয়েছে ২৪৭ জনে। এছাড়া গত ২৪ ঘণ্টায় নতুন করে হাসপাতালে ভর্তি হয়েছেন ২ হাজার ৭৩১ জন, যা একদিনে এ বছরের মধ্যে সর্বোচ্চ। \n স্বাস্থ্য অধিদপ্তর জানায়, ডেঙ্গু আক্রান্ত হয়ে দেশের বিভিন্ন সরকারি-বেসরকারি হাসপাতালে ভর্তি হয়েছেন দুই হাজার ৭৩১ জন। তাদের মধ্যে ঢাকার বাসিন্দা এক হাজার ১৮৪ জন ও ঢাকার বাইরের এক হাজার ৫৪৭ জন। ২৪ ঘণ্টায় মৃত আটজনের মধ্যে চারজন ঢাকার ও চারজন ঢাকার বাইরের বাসিন্দা বলে জানায় স্বাস্থ্য অধিদপ্তর। \n চলতি বছরের ১ জানুয়ারি থেকে ৩০ জুলাই পর্যন্ত ডেঙ্গু আক্রান্ত হয়েছেন ৪৯ হাজার ১৩৮ জন। তাদের মধ্যে ঢাকার বাসিন্দা ২৮ হাজার ৩২ জন। ঢাকার বাইরের হাসপাতালগুলোতে ভর্তি হয়েছেন ২১ হাজার ১০৬ জন। একই সময়ে হাসপাতাল থেকে ছাড়পত্র পেয়েছেন ৩৯ হাজার ৪৭৩ জন। তাদের মধ্যে ঢাকার বাসিন্দা ২২ হাজার ৬৯৩ জন এবং ঢাকার বাইরের ১৬ হাজার ৭৮০ জন। \nউল্লেখ্য, ২০২২ সালে ডেঙ্গুতে দেশের ইতিহাসের সর্বোচ্চ ২৮১ জন মারা যান। একই সঙ্গে আলোচ্য বছরে ডেঙ্গু আক্রান্ত হয়ে হাসপাতালে ভর্তি হন ৬২ হাজার ৩৮২ জন। ২০২১ সালে সারাদেশে ডেঙ্গু আক্রান্ত হন ২৮ হাজার ৪২৯ জন। একই বছর দেশব্যাপী ডেঙ্গু আক্রান্ত হয়ে ১০৫ জনের মৃত্যু হয়েছিল। " ]

	]
	)
	demo.launch(inline=False)