PirateXX commited on
Commit
e8af4f9
·
1 Parent(s): 386c521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -2
app.py CHANGED
@@ -1,7 +1,7 @@
 
1
  from flask import Flask, request
2
  from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
3
  import torch
4
- import gradio as gr
5
  import os
6
  import re
7
  app = Flask(__name__)
@@ -13,6 +13,11 @@ model = RobertaForSequenceClassification.from_pretrained("PirateXX/ChatGPT_Detec
13
  model_name = "roberta-base"
14
  tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
15
 
 
 
 
 
 
16
  def text_to_sentences(text):
17
  clean_text = text.replace('\n', ' ')
18
  return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
@@ -64,8 +69,54 @@ def findRealProb(text):
64
  realProb = ans/cnt
65
  return {"Real": realProb, "Fake": 1-realProb}, results
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  demo = gr.Interface(
68
- fn=[findRealProb, predict],
69
  inputs=gr.Textbox(placeholder="Copy and paste here..."),
70
  article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!",
71
  outputs=gr.outputs.JSON(),
 
1
+ import gradio as gr
2
  from flask import Flask, request
3
  from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
4
  import torch
 
5
  import os
6
  import re
7
  app = Flask(__name__)
 
13
  model_name = "roberta-base"
14
  tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
15
 
16
+ device = 'cuda' if cuda.is_available() else 'cpu'
17
+ model_id = "gpt2"
18
+ modelgpt2 = GPT2LMHeadModel.from_pretrained(model_id).to(device)
19
+ tokenizergpt2 = GPT2TokenizerFast.from_pretrained(model_id)
20
+
21
  def text_to_sentences(text):
22
  clean_text = text.replace('\n', ' ')
23
  return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
 
69
  realProb = ans/cnt
70
  return {"Real": realProb, "Fake": 1-realProb}, results
71
 
72
+
73
+ def text_to_sentences(text):
74
+ clean_text = text.replace('\n', ' ')
75
+ return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
76
+
77
+ def calculatePerplexity(text):
78
+ encodings = tokenizergpt2("\n\n".join([text]), return_tensors="pt")
79
+ max_length = modelgpt2.config.n_positions
80
+ stride = 512
81
+ seq_len = encodings.input_ids.size(1)
82
+
83
+ nlls = []
84
+ prev_end_loc = 0
85
+ for begin_loc in range(0, seq_len, stride):
86
+ end_loc = min(begin_loc + max_length, seq_len)
87
+ trg_len = end_loc - prev_end_loc
88
+ input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
89
+ target_ids = input_ids.clone()
90
+ target_ids[:, :-trg_len] = -100
91
+
92
+ with torch.no_grad():
93
+ outputs = modelgpt2(input_ids, labels=target_ids)
94
+ neg_log_likelihood = outputs.loss * trg_len
95
+
96
+ nlls.append(neg_log_likelihood)
97
+
98
+ prev_end_loc = end_loc
99
+ if end_loc == seq_len:
100
+ break
101
+
102
+ ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
103
+
104
+ return ppl.item()
105
+
106
+ @app.get("/getPerplexities")
107
+ def calculatePerplexities(text):
108
+ sentences = text_to_sentences(text)
109
+ perplexities = []
110
+ for sentence in sentences:
111
+ perplexity = calculatePerplexity(sentence)
112
+ label = "Human"
113
+ if perplexity<25:
114
+ label = "AI"
115
+ perplexities.append({"sentence": sentence, "perplexity": perplexity, "label": label})
116
+ return perplexities
117
+
118
  demo = gr.Interface(
119
+ fn=findRealProb,
120
  inputs=gr.Textbox(placeholder="Copy and paste here..."),
121
  article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!",
122
  outputs=gr.outputs.JSON(),