kjgkbj commited on
Commit
0b766f0
·
verified ·
1 Parent(s): 745b11b

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +419 -0
  2. requirements.txt +6 -0
  3. static/logo.png +0 -0
app.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template_string
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+ import re
5
+ import nltk
6
+ nltk.download('punkt')
7
+ from nltk import sent_tokenize
8
+
9
+ class Adequacy:
10
+ def __init__(self, model_tag='prithivida/parrot_adequacy_model'):
11
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
12
+ self.adequacy_model = AutoModelForSequenceClassification.from_pretrained(model_tag)
13
+ self.tokenizer = AutoTokenizer.from_pretrained(model_tag)
14
+
15
+ def filter(self, input_phrase, para_phrases, adequacy_threshold, device="cpu"):
16
+ top_adequacy_phrases = []
17
+ for para_phrase in para_phrases:
18
+ x = self.tokenizer(input_phrase, para_phrase, return_tensors='pt', max_length=128, truncation=True)
19
+ x = x.to(device)
20
+ self.adequacy_model = self.adequacy_model.to(device)
21
+ logits = self.adequacy_model(**x).logits
22
+ probs = logits.softmax(dim=1)
23
+ prob_label_is_true = probs[:, 1]
24
+ adequacy_score = prob_label_is_true.item()
25
+ if adequacy_score >= adequacy_threshold:
26
+ top_adequacy_phrases.append(para_phrase)
27
+ return top_adequacy_phrases
28
+
29
+ class Fluency:
30
+ def __init__(self, model_tag='prithivida/parrot_fluency_model'):
31
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
32
+ self.fluency_model = AutoModelForSequenceClassification.from_pretrained(model_tag, num_labels=2)
33
+ self.fluency_tokenizer = AutoTokenizer.from_pretrained(model_tag)
34
+
35
+ def filter(self, para_phrases, fluency_threshold, device="cpu"):
36
+ import numpy as np
37
+ from scipy.special import softmax
38
+ self.fluency_model = self.fluency_model.to(device)
39
+ top_fluent_phrases = []
40
+ for para_phrase in para_phrases:
41
+ input_ids = self.fluency_tokenizer("Sentence: " + para_phrase, return_tensors='pt', truncation=True)
42
+ input_ids = input_ids.to(device)
43
+ prediction = self.fluency_model(**input_ids)
44
+ scores = prediction[0][0].detach().cpu().numpy()
45
+ scores = softmax(scores)
46
+ fluency_score = scores[1] # LABEL_0 = Bad Fluency, LABEL_1 = Good Fluency
47
+ if fluency_score >= fluency_threshold:
48
+ top_fluent_phrases.append(para_phrase)
49
+ return top_fluent_phrases
50
+
51
+ class Diversity:
52
+ def __init__(self, model_tag='paraphrase-distilroberta-base-v2'):
53
+ from sentence_transformers import SentenceTransformer
54
+ self.diversity_model = SentenceTransformer(model_tag)
55
+
56
+ def rank(self, input_phrase, para_phrases, diversity_ranker='levenshtein'):
57
+ if diversity_ranker == "levenshtein":
58
+ return self.levenshtein_ranker(input_phrase, para_phrases)
59
+ elif diversity_ranker == "euclidean":
60
+ return self.euclidean_ranker(input_phrase, para_phrases)
61
+ elif diversity_ranker == "diff":
62
+ return self.diff_ranker(input_phrase, para_phrases)
63
+
64
+ def euclidean_ranker(self, input_phrase, para_phrases):
65
+ import pandas as pd
66
+ from sklearn_pandas import DataFrameMapper
67
+ from sklearn.preprocessing import MinMaxScaler
68
+ from scipy import spatial
69
+
70
+ diversity_scores = {}
71
+ outputs = []
72
+ input_enc = self.diversity_model.encode(input_phrase.lower())
73
+ for para_phrase in para_phrases:
74
+ paraphrase_enc = self.diversity_model.encode(para_phrase.lower())
75
+ euclidean_distance = spatial.distance.euclidean(input_enc, paraphrase_enc)
76
+ outputs.append((para_phrase, euclidean_distance))
77
+ df = pd.DataFrame(outputs, columns=['paraphrase', 'scores'])
78
+ fields = []
79
+ for col in df.columns:
80
+ if col == "scores":
81
+ tup = ([col], MinMaxScaler())
82
+ else:
83
+ tup = ([col], None)
84
+ fields.append(tup)
85
+
86
+ mapper = DataFrameMapper(fields, df_out=True)
87
+ for index, row in mapper.fit_transform(df.copy()).iterrows():
88
+ diversity_scores[row['paraphrase']] = row['scores']
89
+ return diversity_scores
90
+
91
+ def levenshtein_ranker(self, input_phrase, para_phrases):
92
+ import Levenshtein
93
+ diversity_scores = {}
94
+ for para_phrase in para_phrases:
95
+ distance = Levenshtein.distance(input_phrase.lower(), para_phrase)
96
+ diversity_scores[para_phrase] = distance
97
+ return diversity_scores
98
+
99
+ def diff_ranker(self, input_phrase, para_phrases):
100
+ import difflib
101
+ differ = difflib.Differ()
102
+ diversity_scores = {}
103
+ for para_phrase in para_phrases:
104
+ diff = differ.compare(input_phrase.split(), para_phrase.split())
105
+ count = 0
106
+ for d in diff:
107
+ if "+" in d or "-" in d:
108
+ count += 1
109
+ diversity_scores[para_phrase] = count
110
+ return diversity_scores
111
+
112
+ class Parrot:
113
+ def __init__(self, model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False):
114
+ self.tokenizer = AutoTokenizer.from_pretrained(model_tag, use_auth_token=False)
115
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(model_tag, use_auth_token=False)
116
+ self.adequacy_score = Adequacy()
117
+ self.fluency_score = Fluency()
118
+ self.diversity_score = Diversity()
119
+ self.device = "cuda:0" if use_gpu and torch.cuda.is_available() else "cpu"
120
+ self.model.to(self.device)
121
+
122
+ def _clean_text(self, text):
123
+ """Utility function to clean text by removing unwanted characters"""
124
+ return re.sub('[^a-zA-Z0-9 \?\'\-\/\:\.]', '', text).lower()
125
+
126
+ def _generate_paraphrases(self, input_phrase, max_length, max_return_phrases, do_diverse):
127
+ """Generates paraphrases for a given input phrase"""
128
+ input_phrase = self._clean_text(input_phrase)
129
+ input_ids = self.tokenizer.encode("paraphrase: " + input_phrase, return_tensors='pt').to(self.device)
130
+ if do_diverse:
131
+ for n in range(2, 9):
132
+ if max_return_phrases % n == 0:
133
+ break
134
+ preds = self.model.generate(
135
+ input_ids,
136
+ do_sample=False,
137
+ max_length=max_length,
138
+ num_beams=max_return_phrases,
139
+ num_beam_groups=n,
140
+ diversity_penalty=2.0,
141
+ early_stopping=True,
142
+ num_return_sequences=max_return_phrases)
143
+ else:
144
+ preds = self.model.generate(
145
+ input_ids,
146
+ do_sample=True,
147
+ max_length=max_length,
148
+ top_k=50,
149
+ top_p=0.95,
150
+ early_stopping=True,
151
+ num_return_sequences=max_return_phrases)
152
+
153
+ paraphrases = set(self.tokenizer.decode(pred, skip_special_tokens=True) for pred in preds)
154
+ return self._clean_paraphrases(paraphrases)
155
+
156
+ def _clean_paraphrases(self, paraphrases):
157
+ """Utility function to clean generated paraphrases"""
158
+ return {self._clean_text(phrase) for phrase in paraphrases}
159
+
160
+ def _filter_and_rank_paraphrases(self, input_phrase, paraphrases, adequacy_threshold, fluency_threshold, diversity_ranker):
161
+ """Filters and ranks paraphrases based on adequacy, fluency, and diversity"""
162
+ adequacy_filtered_phrases = self.adequacy_score.filter(input_phrase, paraphrases, adequacy_threshold, self.device)
163
+ if not adequacy_filtered_phrases:
164
+ return []
165
+
166
+ fluency_filtered_phrases = self.fluency_score.filter(adequacy_filtered_phrases, fluency_threshold, self.device)
167
+ if not fluency_filtered_phrases:
168
+ return []
169
+
170
+ diversity_scored_phrases = self.diversity_score.rank(input_phrase, fluency_filtered_phrases, diversity_ranker)
171
+ ranked_phrases = sorted(diversity_scored_phrases.items(), key=lambda x: x[1], reverse=True)
172
+
173
+ return ranked_phrases
174
+
175
+ def paraphrase_sentence(self, sentence, diversity_ranker="levenshtein", do_diverse=False, max_length=512, adequacy_threshold=0.90, fluency_threshold=0.90, max_return_phrases=10):
176
+ """Paraphrases a single sentence"""
177
+ paraphrases = self._generate_paraphrases(sentence, max_length, max_return_phrases, do_diverse)
178
+ filtered_and_ranked = self._filter_and_rank_paraphrases(sentence, paraphrases, adequacy_threshold, fluency_threshold, diversity_ranker)
179
+ return filtered_and_ranked
180
+
181
+ def paraphrase_essay(self, essay, diversity_ranker="levenshtein", do_diverse=False, max_length=512, adequacy_threshold=0.90, fluency_threshold=0.90, max_return_phrases=10):
182
+ """Paraphrases an entire essay sentence by sentence"""
183
+ sentences = sent_tokenize(essay)
184
+ paraphrased_sentences = []
185
+
186
+ for sentence in sentences:
187
+ paraphrased = self.paraphrase_sentence(sentence, diversity_ranker, do_diverse, max_length, adequacy_threshold, fluency_threshold, max_return_phrases)
188
+ if paraphrased:
189
+ paraphrased_sentences.append(paraphrased[0][0])
190
+
191
+ return ' '.join(paraphrased_sentences)
192
+
193
+ # Flask app setup
194
+ app = Flask(__name__)
195
+ parrot_instance = Parrot(use_gpu=False)
196
+
197
+ @app.route('/', methods=['GET', 'POST'])
198
+ def home():
199
+ paraphrased_essay = ""
200
+ diversity_ranker = request.form.get('diversity_ranker', 'levenshtein')
201
+ fluency_threshold = float(request.form.get('fluency_threshold', 0.9))
202
+
203
+ if request.method == 'POST':
204
+ essay = request.form.get('essay')
205
+ if essay:
206
+ paraphrased_essay = parrot_instance.paraphrase_essay(
207
+ essay,
208
+ diversity_ranker=diversity_ranker,
209
+ fluency_threshold=fluency_threshold
210
+ )
211
+
212
+ return render_template_string("""
213
+ <!DOCTYPE html>
214
+ <html lang="en">
215
+ <head>
216
+ <meta charset="UTF-8">
217
+ <title>Paraphrase Your Essay</title>
218
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" rel="stylesheet">
219
+ <style>
220
+ body {
221
+ font-family: Arial, sans-serif; margin: 0; background-color: #f4f4f4;
222
+ color: #333; padding: 0;
223
+ }
224
+ .container {
225
+ max-width: 900px; margin: auto; padding: 40px; background-color: #fff;
226
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1); border-radius: 8px; margin-top: 50px;
227
+ text-align: center;
228
+ }
229
+ header {
230
+ margin-bottom: 20px;
231
+ }
232
+ header img {
233
+ height: 80px; margin-right: 10px;
234
+ }
235
+ h1 {
236
+ text-align: center; color: #333;
237
+ font-size: 2.5em;
238
+ margin-bottom: 10px;
239
+ }
240
+ .subtitle {
241
+ color: #666;
242
+ font-size: 1.2em;
243
+ margin-bottom: 40px;
244
+ }
245
+ textarea {
246
+ width: 100%; height: 200px; margin-bottom: 20px; padding: 15px;
247
+ border: 1px solid #ddd; border-radius: 4px; font-family: Arial, sans-serif;
248
+ resize: vertical; font-size: 1em;
249
+ }
250
+ input[type='submit'] {
251
+ padding: 15px 30px; background-color: #007BFF; color: white;
252
+ border: none; border-radius: 4px; cursor: pointer;
253
+ font-size: 1.2em;
254
+ transition: background-color 0.3s;
255
+ }
256
+ input[type='submit']:hover {
257
+ background-color: #0056b3;
258
+ }
259
+ select, input[type='range'], label {
260
+ margin-bottom: 10px; padding: 10px; border-radius: 4px;
261
+ display: block;
262
+ width: 100%;
263
+ font-size: 1em;
264
+ }
265
+ .settings {
266
+ display: flex; flex-direction: column; align-items: center; margin-bottom: 20px;
267
+ }
268
+ .settings div {
269
+ width: 100%; max-width: 400px; margin-bottom: 20px;
270
+ }
271
+ .slider-container {
272
+ text-align: left;
273
+ width: 100%; max-width: 400px;
274
+ }
275
+ .slider-label {
276
+ font-size: 1em;
277
+ margin-bottom: 5px;
278
+ }
279
+ .slider {
280
+ width: 100%;
281
+ }
282
+ h2, pre {
283
+ margin-top: 20px; color: #333;
284
+ }
285
+ pre {
286
+ background-color: #f9f9f9; padding: 15px; border: 1px solid #ddd; border-radius: 4px;
287
+ white-space: pre-wrap; word-wrap: break-word;
288
+ text-align: left;
289
+ font-size: 1em;
290
+ }
291
+ .footer {
292
+ text-align: center; margin-top: 30px; font-size: 14px; color: #777;
293
+ }
294
+ .cta-buttons {
295
+ display: flex; justify-content: space-around; margin-top: 20px;
296
+ }
297
+ .cta-buttons a {
298
+ padding: 10px 20px; background-color: #28a745; color: white;
299
+ border: none; border-radius: 4px; cursor: pointer;
300
+ text-decoration: none;
301
+ transition: background-color 0.3s;
302
+ font-size: 1.2em;
303
+ }
304
+ .cta-buttons a:hover {
305
+ background-color: #218838;
306
+ }
307
+ .cta-buttons a.secondary {
308
+ background-color: #ffc107;
309
+ }
310
+ .cta-buttons a.secondary:hover {
311
+ background-color: #e0a800;
312
+ }
313
+ .description {
314
+ text-align: left;
315
+ margin-top: 50px;
316
+ }
317
+ .description h3 {
318
+ margin-top: 30px;
319
+ color: #007BFF;
320
+ }
321
+ .description p {
322
+ margin-bottom: 20px;
323
+ line-height: 1.6;
324
+ }
325
+ .description ul {
326
+ list-style: none;
327
+ padding-left: 0;
328
+ }
329
+ .description ul li {
330
+ margin-bottom: 10px;
331
+ line-height: 1.6;
332
+ }
333
+ </style>
334
+ </head>
335
+ <body>
336
+ <div class="container">
337
+ <header>
338
+ <img src="/static/logo.png" alt="NexByte Logo">
339
+ <h1>Paraphrase Your Essay</h1>
340
+ <p class="subtitle">Humanize AI Text with the Best Paraphrasing Tool</p>
341
+ </header>
342
+ <form method="post">
343
+ <textarea name="essay" rows="10" cols="50" placeholder="Paste your essay here..."></textarea>
344
+ <div class="settings">
345
+ <div>
346
+ <label for="diversity_ranker">Diversity Ranking Method:</label>
347
+ <select name="diversity_ranker">
348
+ <option value="levenshtein" {% if diversity_ranker == 'levenshtein' %}selected{% endif %}>Levenshtein</option>
349
+ <option value="euclidean" {% if diversity_ranker == 'euclidean' %}selected{% endif %}>Euclidean</option>
350
+ <option value="diff" {% if diversity_ranker == 'diff' %}selected{% endif %}>Diff</option>
351
+ </select>
352
+ </div>
353
+ <div class="slider-container">
354
+ <label class="slider-label" for="fluency_threshold">Fluency Threshold:</label>
355
+ <input type="range" name="fluency_threshold" class="slider" min="0.5" max="1.0" step="0.01" value="{{ fluency_threshold }}" oninput="this.nextElementSibling.value = this.value">
356
+ <output>{{ fluency_threshold }}</output>
357
+ </div>
358
+ </div>
359
+ <input type="submit" value="Paraphrase">
360
+ </form>
361
+ {% if paraphrased_essay %}
362
+ <h2>Paraphrased Essay</h2>
363
+ <pre>{{ paraphrased_essay }}</pre>
364
+ {% endif %}
365
+ <div class="cta-buttons">
366
+ <a href="/subscribe">Subscribe Now</a>
367
+ <a href="/contact" class="secondary">Contact Us</a>
368
+ </div>
369
+ <div class="description">
370
+ <h3>What is NexByte's Paraphrasing Tool?</h3>
371
+ <p>The NexByte Paraphrasing Tool is an innovative online tool for converting AI-generated content into human-like writing. This programme, also known as the NexByte AI Text Converter, efficiently rewrites content written by AI writers such as ChatGPT, Google Bard, Microsoft Bing, Claude, QuillBot, Grammarly, Jasper.ai, Copy.ai, and any other AI text generator. It ensures that the text is free of robotic tones, rendering it indistinguishable from human writing.</p>
372
+ <p>Our application employs advanced proprietary algorithms to preserve the original content and context of the text while improving readability and Search Engine Optimisation (SEO) potential. The content created with NexByte Paraphrasing Tool is completely plagiarism-free and undetectable by all existing AI detectors on the market.</p>
373
+
374
+ <h3>What Does "Paraphrasing AI Text" Mean?</h3>
375
+ <p>Paraphrasing AI text entails transforming AI-generated content into writing that appears more naturally human. This technique entails making the language more interesting, accessible, and clear to human readers while removing any robotic tones.</p>
376
+ <p>NexByte's method for humanising AI text includes:</p>
377
+ <ul>
378
+ <li><strong>Natural Language Use:</strong> Ensure that the material flows organically and reads easily.</li>
379
+ <li><strong>Empathy and Understanding:</strong> Adding a human element to make things more relatable.</li>
380
+ <li><strong>Personalisation:</strong> Tailoring the text to individual audiences and settings.</li>
381
+ <li><strong>Engagement:</strong> Making the information more intriguing and interactive.</li>
382
+ <li><strong>Clarity and Simplicity:</strong> Ensure that the text is easy to read and understand.</li>
383
+ <li><strong>Ethical and Cultural Sensitivity:</strong> Ensure that the content adheres to all cultural and ethical norms.</li>
384
+ </ul>
385
+
386
+ <h3>How Can We Paraphrase AI Text Online for Free?</h3>
387
+ <p>Using the NexByte Paraphrasing Tool is simple and intuitive. Follow these easy steps to turn your AI-generated writing into human-like content:</p>
388
+ <ul>
389
+ <li><strong>Open the NexByte Paraphrasing Tool:</strong> Navigate to NexByte Paraphrasing Tool using your choice web browser. Our programme works with all major browsers.</li>
390
+ <li><strong>To enter AI-generated text:</strong> Simply paste it into the webpage's input text form.</li>
391
+ <li><strong>To customise the paraphrasing process:</strong> Adjust preferences such as the diversity ranking method (Levenshtein, Euclidean, or Diff) and fluency threshold.</li>
392
+ <li><strong>To start the paraphrasing process:</strong> Click the "Paraphrase" button. The tool will begin to convert the AI-generated text into human-like text. Please be patient; this may take some time.</li>
393
+ <li><strong>After reviewing and editing:</strong> The final output text will be presented. Review the text, and if required, alter the settings before repeating the process until you are satisfied with the outcome.</li>
394
+ <li><strong>Use the Text:</strong> Copy the relevant text, make any necessary adjustments, and use it in your projects.</li>
395
+ <li><strong>Click the "Paraphrase Again" button:</strong> To begin a new session with different AI-generated input.</li>
396
+ </ul>
397
+ <p>Voila! You now have content that reads naturally, is free of robotic tones, and is undetectable by AI detection software.</p>
398
+
399
+ <h3>Why Should I Use NexByte Paraphrasing Tool?</h3>
400
+ <p>NexByte Paraphrasing Tool stands apart because:</p>
401
+ <ul>
402
+ <li><strong>Advanced Algorithms:</strong> Uses cutting-edge technologies to assure high-quality paraphrase.</li>
403
+ <li><strong>User-Friendly Interface:</strong> The design is simple and intuitive, allowing for easy use.</li>
404
+ <li><strong>Customisable Settings:</strong> Users can fine-tune the paraphrasing process to match unique requirements.</li>
405
+ <li><strong>Reliable Output:</strong> Generates plagiarism-free, SEO-optimized, and human-like text.</li>
406
+ <li><strong>Free to Use:</strong> Advanced paraphrasing is available online for free, making it accessible to anyone.</li>
407
+ </ul>
408
+ <p>Experience the future of content creation with the NexByte Paraphrasing Tool, which effortlessly transforms AI-generated prose into human-like masterpieces.</p>
409
+ </div>
410
+ <div class="footer">
411
+ &copy; 2024 NexByte. All rights reserved.
412
+ </div>
413
+ </div>
414
+ </body>
415
+ </html>
416
+ """, paraphrased_essay=paraphrased_essay, diversity_ranker=diversity_ranker, fluency_threshold=fluency_threshold)
417
+
418
+ if __name__ == '__main__':
419
+ app.run(debug=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers[torchhub]
2
+ sentencepiece
3
+ python-Levenshtein
4
+ fuzzywuzzy
5
+ sentence-transformers
6
+ pandas
static/logo.png ADDED