Spaces:
Runtime error
Runtime error
from flask import Flask, request | |
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig | |
import torch | |
import gradio as gr | |
import os | |
import re | |
import pdfplumber | |
app = Flask(__name__) | |
ACCESS_TOKEN = os.environ["ACCESS_TOKEN"] | |
config = RobertaConfig.from_pretrained("PirateXX/ChatGPT-Text-Detector", use_auth_token= ACCESS_TOKEN) | |
model = RobertaForSequenceClassification.from_pretrained("PirateXX/ChatGPT-Text-Detector", use_auth_token= ACCESS_TOKEN, config = config) | |
model_name = "roberta-base" | |
tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu')) | |
# function to break text into an array of sentences | |
def text_to_sentences(text): | |
re.sub(r'(?<=[.!?])(?=[^\s])', r' ', text) | |
return re.split(r'[.!?]', text) | |
# function to concatenate sentences into chunks of size 600 or less | |
def chunks_of_600(text, chunk_size=600): | |
sentences = text_to_sentences(text) | |
chunks = [] | |
current_chunk = "" | |
for sentence in sentences: | |
if len(current_chunk + sentence) <= chunk_size: | |
current_chunk += sentence | |
else: | |
chunks.append(current_chunk) | |
current_chunk = sentence | |
chunks.append(current_chunk) | |
return chunks | |
def predict(query, device="cpu"): | |
tokens = tokenizer.encode(query) | |
all_tokens = len(tokens) | |
tokens = tokens[:tokenizer.model_max_length - 2] | |
used_tokens = len(tokens) | |
tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0) | |
mask = torch.ones_like(tokens) | |
with torch.no_grad(): | |
logits = model(tokens.to(device), attention_mask=mask.to(device))[0] | |
probs = logits.softmax(dim=-1) | |
fake, real = probs.detach().cpu().flatten().numpy().tolist() | |
return real | |
def findRealProb(text): | |
chunksOfText = (chunks_of_600(text)) | |
results = [] | |
for chunk in chunksOfText: | |
output = predict(chunk) | |
print(chunk) | |
print("-----------------------------------") | |
results.append([output, len(chunk)]) | |
ans = 0 | |
for prob, length in results: | |
ans = ans + prob*length | |
realProb = ans/len(text) | |
return {"Real": realProb, "Fake": 1-realProb, "results": results, "text": text} | |
def upload_file(file): | |
if file: | |
pdf_file = file.name | |
print(file, pdf_file) | |
text = "" | |
with pdfplumber.open(pdf_file) as pdf: | |
cnt = 0 | |
for page in pdf.pages: | |
cnt+=1 | |
text+=(page.extract_text(x_tolerance = 1)) | |
if cnt>5: | |
break | |
text = text.replace('\n', ' ') | |
return findRealProb(text) | |
else: | |
return {"error":'No PDF file found in request'} | |
demo = gr.Interface( | |
fn=upload_file, | |
inputs=gr.File(), | |
article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!", | |
outputs=gr.outputs.JSON(), | |
interpretation="default",) | |
demo.launch(show_api=False) |