Spaces:

PirateXX
/

ChatGPT-Detector

Runtime error

App Files Files Community

PirateXX commited on Apr 9, 2023

Commit

e8af4f9

1 Parent(s): 386c521

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -2

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from flask import Flask, request
 from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
 import torch
-import gradio as gr
 import os
 import re
 app = Flask(__name__)
@@ -13,6 +13,11 @@ model = RobertaForSequenceClassification.from_pretrained("PirateXX/ChatGPT_Detec
 model_name = "roberta-base"
 tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
 def text_to_sentences(text):
     clean_text = text.replace('\n', ' ')
     return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
@@ -64,8 +69,54 @@ def findRealProb(text):
     realProb = ans/cnt
     return {"Real": realProb, "Fake": 1-realProb}, results
 demo = gr.Interface(
-        fn=[findRealProb, predict],
         inputs=gr.Textbox(placeholder="Copy and paste here..."),
          article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!",
         outputs=gr.outputs.JSON(),

+import gradio as gr
 from flask import Flask, request
 from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
 import torch
 import os
 import re
 app = Flask(__name__)
 model_name = "roberta-base"
 tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
+device = 'cuda' if cuda.is_available() else 'cpu'
+model_id = "gpt2"
+modelgpt2 = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizergpt2 = GPT2TokenizerFast.from_pretrained(model_id)
 def text_to_sentences(text):
     clean_text = text.replace('\n', ' ')
     return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
     realProb = ans/cnt
     return {"Real": realProb, "Fake": 1-realProb}, results
+def text_to_sentences(text):
+    clean_text = text.replace('\n', ' ')
+    return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
+def calculatePerplexity(text):
+    encodings = tokenizergpt2("\n\n".join([text]), return_tensors="pt")
+    max_length = modelgpt2.config.n_positions
+    stride = 512
+    seq_len = encodings.input_ids.size(1)
+    nlls = []
+    prev_end_loc = 0
+    for begin_loc in range(0, seq_len, stride):
+        end_loc = min(begin_loc + max_length, seq_len)
+        trg_len = end_loc - prev_end_loc
+        input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+        target_ids = input_ids.clone()
+        target_ids[:, :-trg_len] = -100
+        with torch.no_grad():
+            outputs = modelgpt2(input_ids, labels=target_ids)
+            neg_log_likelihood = outputs.loss * trg_len
+        nlls.append(neg_log_likelihood)
+        prev_end_loc = end_loc
+        if end_loc == seq_len:
+            break
+    ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
+    return ppl.item()
+@app.get("/getPerplexities")
+def calculatePerplexities(text):
+    sentences = text_to_sentences(text)
+    perplexities = []
+    for sentence in sentences:
+        perplexity = calculatePerplexity(sentence)
+        label = "Human"
+        if perplexity<25:
+            label = "AI"
+        perplexities.append({"sentence": sentence, "perplexity": perplexity, "label": label})
+    return perplexities
 demo = gr.Interface(
+        fn=findRealProb,
         inputs=gr.Textbox(placeholder="Copy and paste here..."),
          article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!",
         outputs=gr.outputs.JSON(),