Spaces:
Runtime error
Runtime error
Commit
Β·
e2a5271
1
Parent(s):
ba55d6f
Add application file
Browse files- main.py +220 -0
- requirements.txt +11 -0
main.py
ADDED
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import functools
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
from streamlit_option_menu import option_menu
|
6 |
+
import streamlit.components.v1 as html
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
from pathlib import Path
|
10 |
+
import altair as alt
|
11 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
12 |
+
from transformers import RobertaConfig
|
13 |
+
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
|
14 |
+
import torch
|
15 |
+
# from torch import cuda
|
16 |
+
import gradio as gr
|
17 |
+
import os
|
18 |
+
import re
|
19 |
+
import torch, gc
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
24 |
+
# device
|
25 |
+
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained("devloverumar/chatgpt-content-detector")
|
27 |
+
model = AutoModelForSequenceClassification.from_pretrained("devloverumar/chatgpt-content-detector", num_labels=2)
|
28 |
+
# from PIL import Image
|
29 |
+
# gc.collect()
|
30 |
+
# torch.cuda.empty_cache()
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
def text_to_sentences(text):
|
35 |
+
clean_text = text.replace('\n', ' ')
|
36 |
+
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
|
37 |
+
|
38 |
+
# function to concatenate sentences into chunks of size 900 or less
|
39 |
+
def chunks_of_900(text, chunk_size = 900):
|
40 |
+
sentences = text_to_sentences(text)
|
41 |
+
chunks = []
|
42 |
+
current_chunk = ""
|
43 |
+
for sentence in sentences:
|
44 |
+
if len(current_chunk + sentence) <= chunk_size:
|
45 |
+
if len(current_chunk)!=0:
|
46 |
+
current_chunk += " "+sentence
|
47 |
+
else:
|
48 |
+
current_chunk += sentence
|
49 |
+
else:
|
50 |
+
chunks.append(current_chunk)
|
51 |
+
current_chunk = sentence
|
52 |
+
chunks.append(current_chunk)
|
53 |
+
return chunks
|
54 |
+
|
55 |
+
def predict(query):
|
56 |
+
tokens = tokenizer.encode(query)
|
57 |
+
all_tokens = len(tokens)
|
58 |
+
tokens = tokens[:tokenizer.model_max_length - 2]
|
59 |
+
used_tokens = len(tokens)
|
60 |
+
tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
|
61 |
+
mask = torch.ones_like(tokens)
|
62 |
+
|
63 |
+
with torch.no_grad():
|
64 |
+
logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
|
65 |
+
probs = logits.softmax(dim=-1)
|
66 |
+
|
67 |
+
real, fake = probs.detach().cpu().flatten().numpy().tolist() # Hello-SimpleAI/chatgpt-detector-roberta
|
68 |
+
# fake, real = probs.detach().cpu().flatten().numpy().tolist() # PirateXX/AI-Content-Detector-V2
|
69 |
+
|
70 |
+
return real
|
71 |
+
|
72 |
+
def findRealProb(text):
|
73 |
+
chunksOfText = (chunks_of_900(text))
|
74 |
+
results = []
|
75 |
+
for chunk in chunksOfText:
|
76 |
+
output = predict(chunk)
|
77 |
+
results.append([output, len(chunk)])
|
78 |
+
|
79 |
+
ans = 0
|
80 |
+
cnt = 0
|
81 |
+
for prob, length in results:
|
82 |
+
cnt += length
|
83 |
+
ans = ans + prob*length
|
84 |
+
realProb = ans/cnt
|
85 |
+
return {"Real": realProb, "Fake": 1-realProb}, results
|
86 |
+
|
87 |
+
TXT_TO_INSPECT=None
|
88 |
+
def inspect_content(text):
|
89 |
+
TXT_TO_INSPECT=text
|
90 |
+
|
91 |
+
st.markdown(""" <style> .appview-container .main .block-container {
|
92 |
+
max-width: 100%;
|
93 |
+
padding-top: 1rem;
|
94 |
+
padding-right: {1}rem;
|
95 |
+
padding-left: {1}rem;
|
96 |
+
padding-bottom: {1}rem;
|
97 |
+
}</style> """, unsafe_allow_html=True)
|
98 |
+
#Add a logo (optional) in the sidebar
|
99 |
+
# logo = Image.open(r'C:\Users\13525\Desktop\Insights_Bees_logo.png')
|
100 |
+
with st.sidebar:
|
101 |
+
choose = option_menu("Forensic Examiner", ["Inspect Content","Generate Content","About", "Contact"],
|
102 |
+
icons=['camera fill', 'kanban', 'book','person lines fill'],
|
103 |
+
menu_icon="app-indicator", default_index=0,
|
104 |
+
styles={
|
105 |
+
"container": {"padding": "0 5 5 5 !important", "background-color": "#fafafa"},
|
106 |
+
"icon": {"color": "orange", "font-size": "25px"},
|
107 |
+
"nav-link": {"font-size": "16px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
|
108 |
+
"nav-link-selected": {"background-color": "#02ab21"},
|
109 |
+
}
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
if choose == "Inspect Content":
|
114 |
+
#Add the cover image for the cover page. Used a little trick to center the image
|
115 |
+
st.markdown(""" <style> .font {
|
116 |
+
font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
|
117 |
+
</style> """, unsafe_allow_html=True)
|
118 |
+
col1, col2 = st.columns( [0.8, 0.2])
|
119 |
+
with col1: # To display the header text using css style
|
120 |
+
st.markdown('<p class="font">Inspect Content</p>', unsafe_allow_html=True)
|
121 |
+
|
122 |
+
with col2: # To display brand logo
|
123 |
+
st.image('./media/inspection-1.jpg', width=100 )
|
124 |
+
|
125 |
+
txt = st.text_area('Add Text here',height=300, max_chars=2000, value= '''
|
126 |
+
Cristiano Ronaldo is a Portuguese professional soccer player who currently plays
|
127 |
+
as a forward for Manchester United and the Portugal national team. He is widely
|
128 |
+
considered one of the greatest soccer players of all time, having won numerous
|
129 |
+
awards and accolades throughout his career. Ronaldo began his professional career
|
130 |
+
with Sporting CP in Portugal before moving to Manchester United in 2003.
|
131 |
+
He spent six seasons with the club, winning three Premier League titles
|
132 |
+
and one UEFA Champions League title. In 2009, he transferred to Real Madrid
|
133 |
+
for a then-world record transfer fee of $131 million. He spent nine seasons with
|
134 |
+
the club, winning four UEFA Champions League titles, two La Liga titles,
|
135 |
+
and two Copa del Rey titles. In 2018, he transferred to Juventus, where he spent
|
136 |
+
three seasons before returning to Manchester United in 2021. He has also had
|
137 |
+
a successful international career with the Portugal national team, having won
|
138 |
+
the UEFA European Championship in 2016 and the UEFA Nations League in 2019.
|
139 |
+
''', on_change=inspect_content)
|
140 |
+
|
141 |
+
if TXT_TO_INSPECT is not None:
|
142 |
+
with st.spinner('Loading the model..'):
|
143 |
+
model.to(device)
|
144 |
+
|
145 |
+
st.success(f'Model Loaded!', icon="β
")
|
146 |
+
# st.success(f'Reported EER for the selected model {reported_eer}%')
|
147 |
+
with st.spinner("Getting prediction..."):
|
148 |
+
# print(audio.shape)
|
149 |
+
predictions=findRealProb(txt)
|
150 |
+
print('prediction_value',predictions)
|
151 |
+
if predictions[0]['Fake'] > 0.5:
|
152 |
+
# st.error(f"The Sample is spoof: \n Confidence {(prediction_value) }%", icon="π¨")
|
153 |
+
st.error(f"This text is AI generated", icon="π¨")
|
154 |
+
else:
|
155 |
+
st.success(f"This text is real", icon="β
")
|
156 |
+
|
157 |
+
|
158 |
+
# if choose == "Generate Content":
|
159 |
+
# st.markdown(""" <style> .font {
|
160 |
+
# font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
|
161 |
+
# </style> """, unsafe_allow_html=True)
|
162 |
+
# st.markdown('<p class="font">Comparison of Models</p>', unsafe_allow_html=True)
|
163 |
+
# data_frame = get_data()
|
164 |
+
# tab1, tab2 = st.tabs(["EER", "min-TDCF"])
|
165 |
+
# with tab1:
|
166 |
+
# data_frame["EER ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
|
167 |
+
# data_frame["EER ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
|
168 |
+
# data_frame["Cross-dataset 19-21"] = data_frame["Cross-dataset 19-21"].astype('float64')
|
169 |
+
|
170 |
+
# data = data_frame[["Model Name","EER ASVS 2019","EER ASVS 2021","Cross-dataset 19-21"]].reset_index(drop=True).melt('Model Name')
|
171 |
+
# chart=alt.Chart(data).mark_line().encode(
|
172 |
+
# x='Model Name',
|
173 |
+
# y='value',
|
174 |
+
# color='variable'
|
175 |
+
# )
|
176 |
+
# st.altair_chart(chart, theme=None, use_container_width=True)
|
177 |
+
# with tab2:
|
178 |
+
# data_frame["min-TDCF ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
|
179 |
+
# data_frame["min-TDCF ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
|
180 |
+
# data_frame["min-TDCF Cross-dataset"] = data_frame["Cross-dataset 19-21"].astype('float64')
|
181 |
+
|
182 |
+
# data = data_frame[["Model Name","min-TDCF ASVS 2019","min-TDCF ASVS 2021","min-TDCF Cross-dataset"]].reset_index(drop=True).melt('Model Name')
|
183 |
+
# chart=alt.Chart(data).mark_line().encode(
|
184 |
+
# x='Model Name',
|
185 |
+
# y='value',
|
186 |
+
# color='variable'
|
187 |
+
# )
|
188 |
+
# st.altair_chart(chart, theme=None, use_container_width=True)
|
189 |
+
# # Data table
|
190 |
+
# st.markdown(""" <style> .appview-container .main .block-container {
|
191 |
+
# max-width: 100%;
|
192 |
+
# padding-top: {1}rem;
|
193 |
+
# padding-right: {1}rem;
|
194 |
+
# padding-left: {1}rem;
|
195 |
+
# padding-bottom: {1}rem;
|
196 |
+
# }</style> """, unsafe_allow_html=True)
|
197 |
+
# st.dataframe(data_frame, use_container_width=True)
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
if choose == "About":
|
202 |
+
st.markdown(""" <style> .font {
|
203 |
+
font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
|
204 |
+
</style> """, unsafe_allow_html=True)
|
205 |
+
st.markdown('<p class="font">About</p>', unsafe_allow_html=True)
|
206 |
+
if choose == "Contact":
|
207 |
+
st.markdown(""" <style> .font {
|
208 |
+
font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
|
209 |
+
</style> """, unsafe_allow_html=True)
|
210 |
+
st.markdown('<p class="font">Contact Us</p>', unsafe_allow_html=True)
|
211 |
+
with st.form(key='columns_in_form2',clear_on_submit=True): #set clear_on_submit=True so that the form will be reset/cleared once it's submitted
|
212 |
+
#st.write('Please help us improve!')
|
213 |
+
Name=st.text_input(label='Please Enter Your Name') #Collect user feedback
|
214 |
+
Email=st.text_input(label='Please Enter Your Email') #Collect user feedback
|
215 |
+
Message=st.text_input(label='Please Enter Your Message') #Collect user feedback
|
216 |
+
submitted = st.form_submit_button('Submit')
|
217 |
+
if submitted:
|
218 |
+
st.write('Thanks for your contacting us. We will respond to your questions or inquiries as soon as possible!')
|
219 |
+
|
220 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
flask
|
3 |
+
torch
|
4 |
+
gradio
|
5 |
+
datasets
|
6 |
+
evaluate
|
7 |
+
scikit-learn
|
8 |
+
scipy
|
9 |
+
matplotlib
|
10 |
+
accelerate
|
11 |
+
nvidia-ml-py3
|