devloverumar commited on
Commit
e2a5271
Β·
1 Parent(s): ba55d6f

Add application file

Browse files
Files changed (2) hide show
  1. main.py +220 -0
  2. requirements.txt +11 -0
main.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import functools
3
+
4
+ import streamlit as st
5
+ from streamlit_option_menu import option_menu
6
+ import streamlit.components.v1 as html
7
+ import pandas as pd
8
+ import numpy as np
9
+ from pathlib import Path
10
+ import altair as alt
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
12
+ from transformers import RobertaConfig
13
+ from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
14
+ import torch
15
+ # from torch import cuda
16
+ import gradio as gr
17
+ import os
18
+ import re
19
+ import torch, gc
20
+
21
+
22
+
23
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
24
+ # device
25
+
26
+ tokenizer = AutoTokenizer.from_pretrained("devloverumar/chatgpt-content-detector")
27
+ model = AutoModelForSequenceClassification.from_pretrained("devloverumar/chatgpt-content-detector", num_labels=2)
28
+ # from PIL import Image
29
+ # gc.collect()
30
+ # torch.cuda.empty_cache()
31
+
32
+
33
+
34
+ def text_to_sentences(text):
35
+ clean_text = text.replace('\n', ' ')
36
+ return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
37
+
38
+ # function to concatenate sentences into chunks of size 900 or less
39
+ def chunks_of_900(text, chunk_size = 900):
40
+ sentences = text_to_sentences(text)
41
+ chunks = []
42
+ current_chunk = ""
43
+ for sentence in sentences:
44
+ if len(current_chunk + sentence) <= chunk_size:
45
+ if len(current_chunk)!=0:
46
+ current_chunk += " "+sentence
47
+ else:
48
+ current_chunk += sentence
49
+ else:
50
+ chunks.append(current_chunk)
51
+ current_chunk = sentence
52
+ chunks.append(current_chunk)
53
+ return chunks
54
+
55
+ def predict(query):
56
+ tokens = tokenizer.encode(query)
57
+ all_tokens = len(tokens)
58
+ tokens = tokens[:tokenizer.model_max_length - 2]
59
+ used_tokens = len(tokens)
60
+ tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
61
+ mask = torch.ones_like(tokens)
62
+
63
+ with torch.no_grad():
64
+ logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
65
+ probs = logits.softmax(dim=-1)
66
+
67
+ real, fake = probs.detach().cpu().flatten().numpy().tolist() # Hello-SimpleAI/chatgpt-detector-roberta
68
+ # fake, real = probs.detach().cpu().flatten().numpy().tolist() # PirateXX/AI-Content-Detector-V2
69
+
70
+ return real
71
+
72
+ def findRealProb(text):
73
+ chunksOfText = (chunks_of_900(text))
74
+ results = []
75
+ for chunk in chunksOfText:
76
+ output = predict(chunk)
77
+ results.append([output, len(chunk)])
78
+
79
+ ans = 0
80
+ cnt = 0
81
+ for prob, length in results:
82
+ cnt += length
83
+ ans = ans + prob*length
84
+ realProb = ans/cnt
85
+ return {"Real": realProb, "Fake": 1-realProb}, results
86
+
87
+ TXT_TO_INSPECT=None
88
+ def inspect_content(text):
89
+ TXT_TO_INSPECT=text
90
+
91
+ st.markdown(""" <style> .appview-container .main .block-container {
92
+ max-width: 100%;
93
+ padding-top: 1rem;
94
+ padding-right: {1}rem;
95
+ padding-left: {1}rem;
96
+ padding-bottom: {1}rem;
97
+ }</style> """, unsafe_allow_html=True)
98
+ #Add a logo (optional) in the sidebar
99
+ # logo = Image.open(r'C:\Users\13525\Desktop\Insights_Bees_logo.png')
100
+ with st.sidebar:
101
+ choose = option_menu("Forensic Examiner", ["Inspect Content","Generate Content","About", "Contact"],
102
+ icons=['camera fill', 'kanban', 'book','person lines fill'],
103
+ menu_icon="app-indicator", default_index=0,
104
+ styles={
105
+ "container": {"padding": "0 5 5 5 !important", "background-color": "#fafafa"},
106
+ "icon": {"color": "orange", "font-size": "25px"},
107
+ "nav-link": {"font-size": "16px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
108
+ "nav-link-selected": {"background-color": "#02ab21"},
109
+ }
110
+ )
111
+
112
+
113
+ if choose == "Inspect Content":
114
+ #Add the cover image for the cover page. Used a little trick to center the image
115
+ st.markdown(""" <style> .font {
116
+ font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
117
+ </style> """, unsafe_allow_html=True)
118
+ col1, col2 = st.columns( [0.8, 0.2])
119
+ with col1: # To display the header text using css style
120
+ st.markdown('<p class="font">Inspect Content</p>', unsafe_allow_html=True)
121
+
122
+ with col2: # To display brand logo
123
+ st.image('./media/inspection-1.jpg', width=100 )
124
+
125
+ txt = st.text_area('Add Text here',height=300, max_chars=2000, value= '''
126
+ Cristiano Ronaldo is a Portuguese professional soccer player who currently plays
127
+ as a forward for Manchester United and the Portugal national team. He is widely
128
+ considered one of the greatest soccer players of all time, having won numerous
129
+ awards and accolades throughout his career. Ronaldo began his professional career
130
+ with Sporting CP in Portugal before moving to Manchester United in 2003.
131
+ He spent six seasons with the club, winning three Premier League titles
132
+ and one UEFA Champions League title. In 2009, he transferred to Real Madrid
133
+ for a then-world record transfer fee of $131 million. He spent nine seasons with
134
+ the club, winning four UEFA Champions League titles, two La Liga titles,
135
+ and two Copa del Rey titles. In 2018, he transferred to Juventus, where he spent
136
+ three seasons before returning to Manchester United in 2021. He has also had
137
+ a successful international career with the Portugal national team, having won
138
+ the UEFA European Championship in 2016 and the UEFA Nations League in 2019.
139
+ ''', on_change=inspect_content)
140
+
141
+ if TXT_TO_INSPECT is not None:
142
+ with st.spinner('Loading the model..'):
143
+ model.to(device)
144
+
145
+ st.success(f'Model Loaded!', icon="βœ…")
146
+ # st.success(f'Reported EER for the selected model {reported_eer}%')
147
+ with st.spinner("Getting prediction..."):
148
+ # print(audio.shape)
149
+ predictions=findRealProb(txt)
150
+ print('prediction_value',predictions)
151
+ if predictions[0]['Fake'] > 0.5:
152
+ # st.error(f"The Sample is spoof: \n Confidence {(prediction_value) }%", icon="🚨")
153
+ st.error(f"This text is AI generated", icon="🚨")
154
+ else:
155
+ st.success(f"This text is real", icon="βœ…")
156
+
157
+
158
+ # if choose == "Generate Content":
159
+ # st.markdown(""" <style> .font {
160
+ # font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
161
+ # </style> """, unsafe_allow_html=True)
162
+ # st.markdown('<p class="font">Comparison of Models</p>', unsafe_allow_html=True)
163
+ # data_frame = get_data()
164
+ # tab1, tab2 = st.tabs(["EER", "min-TDCF"])
165
+ # with tab1:
166
+ # data_frame["EER ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
167
+ # data_frame["EER ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
168
+ # data_frame["Cross-dataset 19-21"] = data_frame["Cross-dataset 19-21"].astype('float64')
169
+
170
+ # data = data_frame[["Model Name","EER ASVS 2019","EER ASVS 2021","Cross-dataset 19-21"]].reset_index(drop=True).melt('Model Name')
171
+ # chart=alt.Chart(data).mark_line().encode(
172
+ # x='Model Name',
173
+ # y='value',
174
+ # color='variable'
175
+ # )
176
+ # st.altair_chart(chart, theme=None, use_container_width=True)
177
+ # with tab2:
178
+ # data_frame["min-TDCF ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
179
+ # data_frame["min-TDCF ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
180
+ # data_frame["min-TDCF Cross-dataset"] = data_frame["Cross-dataset 19-21"].astype('float64')
181
+
182
+ # data = data_frame[["Model Name","min-TDCF ASVS 2019","min-TDCF ASVS 2021","min-TDCF Cross-dataset"]].reset_index(drop=True).melt('Model Name')
183
+ # chart=alt.Chart(data).mark_line().encode(
184
+ # x='Model Name',
185
+ # y='value',
186
+ # color='variable'
187
+ # )
188
+ # st.altair_chart(chart, theme=None, use_container_width=True)
189
+ # # Data table
190
+ # st.markdown(""" <style> .appview-container .main .block-container {
191
+ # max-width: 100%;
192
+ # padding-top: {1}rem;
193
+ # padding-right: {1}rem;
194
+ # padding-left: {1}rem;
195
+ # padding-bottom: {1}rem;
196
+ # }</style> """, unsafe_allow_html=True)
197
+ # st.dataframe(data_frame, use_container_width=True)
198
+
199
+
200
+
201
+ if choose == "About":
202
+ st.markdown(""" <style> .font {
203
+ font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
204
+ </style> """, unsafe_allow_html=True)
205
+ st.markdown('<p class="font">About</p>', unsafe_allow_html=True)
206
+ if choose == "Contact":
207
+ st.markdown(""" <style> .font {
208
+ font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
209
+ </style> """, unsafe_allow_html=True)
210
+ st.markdown('<p class="font">Contact Us</p>', unsafe_allow_html=True)
211
+ with st.form(key='columns_in_form2',clear_on_submit=True): #set clear_on_submit=True so that the form will be reset/cleared once it's submitted
212
+ #st.write('Please help us improve!')
213
+ Name=st.text_input(label='Please Enter Your Name') #Collect user feedback
214
+ Email=st.text_input(label='Please Enter Your Email') #Collect user feedback
215
+ Message=st.text_input(label='Please Enter Your Message') #Collect user feedback
216
+ submitted = st.form_submit_button('Submit')
217
+ if submitted:
218
+ st.write('Thanks for your contacting us. We will respond to your questions or inquiries as soon as possible!')
219
+
220
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ flask
3
+ torch
4
+ gradio
5
+ datasets
6
+ evaluate
7
+ scikit-learn
8
+ scipy
9
+ matplotlib
10
+ accelerate
11
+ nvidia-ml-py3