NoaiGPT commited on
Commit
0f91720
1 Parent(s): c9c0d41
Files changed (1) hide show
  1. app.py +237 -72
app.py CHANGED
@@ -1,9 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import os
2
  # import json
3
  # import gradio as gr
4
  # import spaces
5
  # import torch
6
- # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
7
  # from sentence_splitter import SentenceSplitter
8
  # from itertools import product
9
 
@@ -11,13 +194,14 @@
11
  # hf_token = os.getenv('HF_TOKEN')
12
 
13
  # cuda_available = torch.cuda.is_available()
14
- # device = torch.device("cpu" if cuda_available else "cpu")
15
  # print(f"Using device: {device}")
16
 
17
  # # Initialize paraphraser model and tokenizer
18
- # paraphraser_model_name = "NoaiGPT/777"
19
- # paraphraser_tokenizer = AutoTokenizer.from_pretrained(paraphraser_model_name, use_auth_token=hf_token)
20
- # paraphraser_model = AutoModelForSeq2SeqLM.from_pretrained(paraphraser_model_name, use_auth_token=hf_token).to(device)
 
21
 
22
  # # Initialize classifier model and tokenizer
23
  # classifier_model_name = "andreas122001/roberta-mixed-detector"
@@ -37,7 +221,7 @@
37
  # main_score = probabilities[0][predicted_class].item()
38
  # return main_label, main_score
39
 
40
- # # @spaces.GPU
41
  # def generate_paraphrases(text, setting, output_format):
42
  # sentences = splitter.split(text)
43
  # all_sentence_paraphrases = []
@@ -46,31 +230,31 @@
46
  # num_return_sequences = 5
47
  # repetition_penalty = 1.1
48
  # no_repeat_ngram_size = 2
49
- # temperature = 1.0
50
  # max_length = 128
51
  # elif setting == 2:
52
- # num_return_sequences = 10
53
  # repetition_penalty = 1.2
54
  # no_repeat_ngram_size = 3
55
- # temperature = 1.2
56
  # max_length = 192
57
  # elif setting == 3:
58
- # num_return_sequences = 15
59
  # repetition_penalty = 1.3
60
  # no_repeat_ngram_size = 4
61
- # temperature = 1.4
62
  # max_length = 256
63
  # elif setting == 4:
64
- # num_return_sequences = 20
65
  # repetition_penalty = 1.4
66
  # no_repeat_ngram_size = 5
67
- # temperature = 1.6
68
  # max_length = 320
69
  # else:
70
- # num_return_sequences = 25
71
  # repetition_penalty = 1.5
72
  # no_repeat_ngram_size = 6
73
- # temperature = 1.8
74
  # max_length = 384
75
 
76
  # top_k = 50
@@ -88,36 +272,30 @@
88
  # }
89
 
90
  # for i, sentence in enumerate(sentences):
91
- # inputs = paraphraser_tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=max_length).to(device)
92
-
93
- # # Generate paraphrases using the specified parameters
94
- # outputs = paraphraser_model.generate(
95
- # inputs.input_ids,
96
- # attention_mask=inputs.attention_mask,
97
  # num_return_sequences=num_return_sequences,
98
- # repetition_penalty=repetition_penalty,
99
- # no_repeat_ngram_size=no_repeat_ngram_size,
100
- # temperature=temperature,
101
- # max_length=max_length,
102
  # top_k=top_k,
103
  # top_p=top_p,
104
- # do_sample=True,
105
- # early_stopping=False,
106
- # length_penalty=length_penalty
 
107
  # )
108
 
109
- # paraphrases = paraphraser_tokenizer.batch_decode(outputs, skip_special_tokens=True)
110
 
111
  # formatted_output += f"Original sentence {i+1}: {sentence}\n"
112
- # for j, paraphrase in enumerate(paraphrases, 1):
113
  # formatted_output += f" Paraphrase {j}: {paraphrase}\n"
114
 
115
  # json_output["paraphrased_versions"].append({
116
  # f"original_sentence_{i+1}": sentence,
117
- # "paraphrases": paraphrases
118
  # })
119
 
120
- # all_sentence_paraphrases.append(paraphrases)
121
  # formatted_output += "\n"
122
 
123
  # all_combinations = list(product(*all_sentence_paraphrases))
@@ -186,7 +364,7 @@ import json
186
  import gradio as gr
187
  import spaces
188
  import torch
189
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
190
  from sentence_splitter import SentenceSplitter
191
  from itertools import product
192
 
@@ -198,10 +376,9 @@ device = torch.device("cuda" if cuda_available else "cpu")
198
  print(f"Using device: {device}")
199
 
200
  # Initialize paraphraser model and tokenizer
201
- paraphraser_model_name = "sharad/ParaphraseGPT"
202
- paraphraser_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
203
  paraphraser_model = AutoModelForSeq2SeqLM.from_pretrained(paraphraser_model_name).to(device)
204
- paraphrase_pipeline = pipeline("text2text-generation", model=paraphraser_model, tokenizer=paraphraser_tokenizer, device=0 if cuda_available else -1)
205
 
206
  # Initialize classifier model and tokenizer
207
  classifier_model_name = "andreas122001/roberta-mixed-detector"
@@ -227,40 +404,26 @@ def generate_paraphrases(text, setting, output_format):
227
  all_sentence_paraphrases = []
228
 
229
  if setting == 1:
230
- num_return_sequences = 5
231
- repetition_penalty = 1.1
232
- no_repeat_ngram_size = 2
233
- temperature = 0.9
234
  max_length = 128
235
  elif setting == 2:
236
- num_return_sequences = 5
237
- repetition_penalty = 1.2
238
- no_repeat_ngram_size = 3
239
- temperature = 0.95
240
  max_length = 192
241
  elif setting == 3:
242
- num_return_sequences = 5
243
- repetition_penalty = 1.3
244
- no_repeat_ngram_size = 4
245
- temperature = 1.0
246
  max_length = 256
247
  elif setting == 4:
248
- num_return_sequences = 5
249
- repetition_penalty = 1.4
250
- no_repeat_ngram_size = 5
251
- temperature = 1.05
252
  max_length = 320
253
  else:
254
- num_return_sequences = 5
255
- repetition_penalty = 1.5
256
- no_repeat_ngram_size = 6
257
- temperature = 1.1
258
  max_length = 384
259
 
260
- top_k = 50
261
- top_p = 0.95
262
- length_penalty = 1.0
263
-
264
  formatted_output = "Original text:\n" + text + "\n\n"
265
  formatted_output += "Paraphrased versions:\n"
266
 
@@ -272,19 +435,21 @@ def generate_paraphrases(text, setting, output_format):
272
  }
273
 
274
  for i, sentence in enumerate(sentences):
275
- paraphrases = paraphrase_pipeline(
276
- sentence,
277
- num_return_sequences=num_return_sequences,
278
- do_sample=True,
279
- top_k=top_k,
280
- top_p=top_p,
281
- temperature=temperature,
282
- no_repeat_ngram_size=no_repeat_ngram_size,
283
- repetition_penalty=repetition_penalty,
284
- max_length=max_length
 
 
285
  )
286
 
287
- paraphrases_texts = [p['generated_text'] for p in paraphrases]
288
 
289
  formatted_output += f"Original sentence {i+1}: {sentence}\n"
290
  for j, paraphrase in enumerate(paraphrases_texts, 1):
@@ -314,7 +479,7 @@ def generate_paraphrases(text, setting, output_format):
314
  label, score = classify_text(version)
315
  formatted_output += f"Version {i}:\n{version}\n"
316
  formatted_output += f"Classification: {label} (confidence: {score:.2%})\n\n"
317
- if label == "human-produced" or (label == "machine-generated" and score < 0.98):
318
  human_versions.append((version, label, score))
319
 
320
  formatted_output += "\nHuman-like or Less Confident Machine-generated versions:\n"
 
1
+ # # import os
2
+ # # import json
3
+ # # import gradio as gr
4
+ # # import spaces
5
+ # # import torch
6
+ # # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
7
+ # # from sentence_splitter import SentenceSplitter
8
+ # # from itertools import product
9
+
10
+ # # # Get the Hugging Face token from environment variable
11
+ # # hf_token = os.getenv('HF_TOKEN')
12
+
13
+ # # cuda_available = torch.cuda.is_available()
14
+ # # device = torch.device("cpu" if cuda_available else "cpu")
15
+ # # print(f"Using device: {device}")
16
+
17
+ # # # Initialize paraphraser model and tokenizer
18
+ # # paraphraser_model_name = "NoaiGPT/777"
19
+ # # paraphraser_tokenizer = AutoTokenizer.from_pretrained(paraphraser_model_name, use_auth_token=hf_token)
20
+ # # paraphraser_model = AutoModelForSeq2SeqLM.from_pretrained(paraphraser_model_name, use_auth_token=hf_token).to(device)
21
+
22
+ # # # Initialize classifier model and tokenizer
23
+ # # classifier_model_name = "andreas122001/roberta-mixed-detector"
24
+ # # classifier_tokenizer = AutoTokenizer.from_pretrained(classifier_model_name)
25
+ # # classifier_model = AutoModelForSequenceClassification.from_pretrained(classifier_model_name).to(device)
26
+
27
+ # # # Initialize sentence splitter
28
+ # # splitter = SentenceSplitter(language='en')
29
+
30
+ # # def classify_text(text):
31
+ # # inputs = classifier_tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
32
+ # # with torch.no_grad():
33
+ # # outputs = classifier_model(**inputs)
34
+ # # probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
35
+ # # predicted_class = torch.argmax(probabilities, dim=-1).item()
36
+ # # main_label = classifier_model.config.id2label[predicted_class]
37
+ # # main_score = probabilities[0][predicted_class].item()
38
+ # # return main_label, main_score
39
+
40
+ # # # @spaces.GPU
41
+ # # def generate_paraphrases(text, setting, output_format):
42
+ # # sentences = splitter.split(text)
43
+ # # all_sentence_paraphrases = []
44
+
45
+ # # if setting == 1:
46
+ # # num_return_sequences = 5
47
+ # # repetition_penalty = 1.1
48
+ # # no_repeat_ngram_size = 2
49
+ # # temperature = 1.0
50
+ # # max_length = 128
51
+ # # elif setting == 2:
52
+ # # num_return_sequences = 10
53
+ # # repetition_penalty = 1.2
54
+ # # no_repeat_ngram_size = 3
55
+ # # temperature = 1.2
56
+ # # max_length = 192
57
+ # # elif setting == 3:
58
+ # # num_return_sequences = 15
59
+ # # repetition_penalty = 1.3
60
+ # # no_repeat_ngram_size = 4
61
+ # # temperature = 1.4
62
+ # # max_length = 256
63
+ # # elif setting == 4:
64
+ # # num_return_sequences = 20
65
+ # # repetition_penalty = 1.4
66
+ # # no_repeat_ngram_size = 5
67
+ # # temperature = 1.6
68
+ # # max_length = 320
69
+ # # else:
70
+ # # num_return_sequences = 25
71
+ # # repetition_penalty = 1.5
72
+ # # no_repeat_ngram_size = 6
73
+ # # temperature = 1.8
74
+ # # max_length = 384
75
+
76
+ # # top_k = 50
77
+ # # top_p = 0.95
78
+ # # length_penalty = 1.0
79
+
80
+ # # formatted_output = "Original text:\n" + text + "\n\n"
81
+ # # formatted_output += "Paraphrased versions:\n"
82
+
83
+ # # json_output = {
84
+ # # "original_text": text,
85
+ # # "paraphrased_versions": [],
86
+ # # "combined_versions": [],
87
+ # # "human_like_versions": []
88
+ # # }
89
+
90
+ # # for i, sentence in enumerate(sentences):
91
+ # # inputs = paraphraser_tokenizer(f'paraphraser: {sentence}', return_tensors="pt", padding="longest", truncation=True, max_length=max_length).to(device)
92
+
93
+ # # # Generate paraphrases using the specified parameters
94
+ # # outputs = paraphraser_model.generate(
95
+ # # inputs.input_ids,
96
+ # # attention_mask=inputs.attention_mask,
97
+ # # num_return_sequences=num_return_sequences,
98
+ # # repetition_penalty=repetition_penalty,
99
+ # # no_repeat_ngram_size=no_repeat_ngram_size,
100
+ # # temperature=temperature,
101
+ # # max_length=max_length,
102
+ # # top_k=top_k,
103
+ # # top_p=top_p,
104
+ # # do_sample=True,
105
+ # # early_stopping=False,
106
+ # # length_penalty=length_penalty
107
+ # # )
108
+
109
+ # # paraphrases = paraphraser_tokenizer.batch_decode(outputs, skip_special_tokens=True)
110
+
111
+ # # formatted_output += f"Original sentence {i+1}: {sentence}\n"
112
+ # # for j, paraphrase in enumerate(paraphrases, 1):
113
+ # # formatted_output += f" Paraphrase {j}: {paraphrase}\n"
114
+
115
+ # # json_output["paraphrased_versions"].append({
116
+ # # f"original_sentence_{i+1}": sentence,
117
+ # # "paraphrases": paraphrases
118
+ # # })
119
+
120
+ # # all_sentence_paraphrases.append(paraphrases)
121
+ # # formatted_output += "\n"
122
+
123
+ # # all_combinations = list(product(*all_sentence_paraphrases))
124
+
125
+ # # formatted_output += "\nCombined paraphrased versions:\n"
126
+ # # combined_versions = []
127
+ # # for i, combination in enumerate(all_combinations[:50], 1): # Limit to 50 combinations
128
+ # # combined_paraphrase = " ".join(combination)
129
+ # # combined_versions.append(combined_paraphrase)
130
+
131
+ # # json_output["combined_versions"] = combined_versions
132
+
133
+ # # # Classify combined versions
134
+ # # human_versions = []
135
+ # # for i, version in enumerate(combined_versions, 1):
136
+ # # label, score = classify_text(version)
137
+ # # formatted_output += f"Version {i}:\n{version}\n"
138
+ # # formatted_output += f"Classification: {label} (confidence: {score:.2%})\n\n"
139
+ # # if label == "human-produced" or (label == "machine-generated" and score < 0.98):
140
+ # # human_versions.append((version, label, score))
141
+
142
+ # # formatted_output += "\nHuman-like or Less Confident Machine-generated versions:\n"
143
+ # # for i, (version, label, score) in enumerate(human_versions, 1):
144
+ # # formatted_output += f"Version {i}:\n{version}\n"
145
+ # # formatted_output += f"Classification: {label} (confidence: {score:.2%})\n\n"
146
+
147
+ # # json_output["human_like_versions"] = [
148
+ # # {"version": version, "label": label, "confidence_score": score}
149
+ # # for version, label, score in human_versions
150
+ # # ]
151
+
152
+ # # # If no human-like versions, include the top 5 least confident machine-generated versions
153
+ # # if not human_versions:
154
+ # # human_versions = sorted([(v, l, s) for v, l, s in zip(combined_versions, [classify_text(v)[0] for v in combined_versions], [classify_text(v)[1] for v in combined_versions])], key=lambda x: x[2])[:5]
155
+ # # formatted_output += "\nNo human-like versions found. Showing top 5 least confident machine-generated versions:\n"
156
+ # # for i, (version, label, score) in enumerate(human_versions, 1):
157
+ # # formatted_output += f"Version {i}:\n{version}\n"
158
+ # # formatted_output += f"Classification: {label} (confidence: {score:.2%})\n\n"
159
+
160
+ # # if output_format == "text":
161
+ # # return formatted_output, "\n\n".join([v[0] for v in human_versions])
162
+ # # else:
163
+ # # return json.dumps(json_output, indent=2), "\n\n".join([v[0] for v in human_versions])
164
+
165
+ # # # Define the Gradio interface
166
+ # # iface = gr.Interface(
167
+ # # fn=generate_paraphrases,
168
+ # # inputs=[
169
+ # # gr.Textbox(lines=5, label="Input Text"),
170
+ # # gr.Slider(minimum=1, maximum=5, step=1, label="Readability to Human-like Setting"),
171
+ # # gr.Radio(["text", "json"], label="Output Format")
172
+ # # ],
173
+ # # outputs=[
174
+ # # gr.Textbox(lines=20, label="Detailed Paraphrases and Classifications"),
175
+ # # gr.Textbox(lines=10, label="Human-like or Less Confident Machine-generated Paraphrases")
176
+ # # ],
177
+ # # title="Advanced Diverse Paraphraser with Human-like Filter",
178
+ # # description="Enter a text, select a setting from readable to human-like, and choose the output format to generate diverse paraphrased versions. Combined versions are classified, and those detected as human-produced or less confidently machine-generated are presented in the final output."
179
+ # # )
180
+
181
+ # # # Launch the interface
182
+ # # iface.launch()
183
+
184
  # import os
185
  # import json
186
  # import gradio as gr
187
  # import spaces
188
  # import torch
189
+ # from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
190
  # from sentence_splitter import SentenceSplitter
191
  # from itertools import product
192
 
 
194
  # hf_token = os.getenv('HF_TOKEN')
195
 
196
  # cuda_available = torch.cuda.is_available()
197
+ # device = torch.device("cuda" if cuda_available else "cpu")
198
  # print(f"Using device: {device}")
199
 
200
  # # Initialize paraphraser model and tokenizer
201
+ # paraphraser_model_name = "sharad/ParaphraseGPT"
202
+ # paraphraser_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
203
+ # paraphraser_model = AutoModelForSeq2SeqLM.from_pretrained(paraphraser_model_name).to(device)
204
+ # paraphrase_pipeline = pipeline("text2text-generation", model=paraphraser_model, tokenizer=paraphraser_tokenizer, device=0 if cuda_available else -1)
205
 
206
  # # Initialize classifier model and tokenizer
207
  # classifier_model_name = "andreas122001/roberta-mixed-detector"
 
221
  # main_score = probabilities[0][predicted_class].item()
222
  # return main_label, main_score
223
 
224
+ # @spaces.GPU
225
  # def generate_paraphrases(text, setting, output_format):
226
  # sentences = splitter.split(text)
227
  # all_sentence_paraphrases = []
 
230
  # num_return_sequences = 5
231
  # repetition_penalty = 1.1
232
  # no_repeat_ngram_size = 2
233
+ # temperature = 0.9
234
  # max_length = 128
235
  # elif setting == 2:
236
+ # num_return_sequences = 5
237
  # repetition_penalty = 1.2
238
  # no_repeat_ngram_size = 3
239
+ # temperature = 0.95
240
  # max_length = 192
241
  # elif setting == 3:
242
+ # num_return_sequences = 5
243
  # repetition_penalty = 1.3
244
  # no_repeat_ngram_size = 4
245
+ # temperature = 1.0
246
  # max_length = 256
247
  # elif setting == 4:
248
+ # num_return_sequences = 5
249
  # repetition_penalty = 1.4
250
  # no_repeat_ngram_size = 5
251
+ # temperature = 1.05
252
  # max_length = 320
253
  # else:
254
+ # num_return_sequences = 5
255
  # repetition_penalty = 1.5
256
  # no_repeat_ngram_size = 6
257
+ # temperature = 1.1
258
  # max_length = 384
259
 
260
  # top_k = 50
 
272
  # }
273
 
274
  # for i, sentence in enumerate(sentences):
275
+ # paraphrases = paraphrase_pipeline(
276
+ # sentence,
 
 
 
 
277
  # num_return_sequences=num_return_sequences,
278
+ # do_sample=True,
 
 
 
279
  # top_k=top_k,
280
  # top_p=top_p,
281
+ # temperature=temperature,
282
+ # no_repeat_ngram_size=no_repeat_ngram_size,
283
+ # repetition_penalty=repetition_penalty,
284
+ # max_length=max_length
285
  # )
286
 
287
+ # paraphrases_texts = [p['generated_text'] for p in paraphrases]
288
 
289
  # formatted_output += f"Original sentence {i+1}: {sentence}\n"
290
+ # for j, paraphrase in enumerate(paraphrases_texts, 1):
291
  # formatted_output += f" Paraphrase {j}: {paraphrase}\n"
292
 
293
  # json_output["paraphrased_versions"].append({
294
  # f"original_sentence_{i+1}": sentence,
295
+ # "paraphrases": paraphrases_texts
296
  # })
297
 
298
+ # all_sentence_paraphrases.append(paraphrases_texts)
299
  # formatted_output += "\n"
300
 
301
  # all_combinations = list(product(*all_sentence_paraphrases))
 
364
  import gradio as gr
365
  import spaces
366
  import torch
367
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
368
  from sentence_splitter import SentenceSplitter
369
  from itertools import product
370
 
 
376
  print(f"Using device: {device}")
377
 
378
  # Initialize paraphraser model and tokenizer
379
+ paraphraser_model_name = "ramsrigouthamg/t5-large-paraphraser-diverse-high-quality"
380
+ paraphraser_tokenizer = AutoTokenizer.from_pretrained(paraphraser_model_name)
381
  paraphraser_model = AutoModelForSeq2SeqLM.from_pretrained(paraphraser_model_name).to(device)
 
382
 
383
  # Initialize classifier model and tokenizer
384
  classifier_model_name = "andreas122001/roberta-mixed-detector"
 
404
  all_sentence_paraphrases = []
405
 
406
  if setting == 1:
407
+ num_return_sequences = 3
408
+ num_beams = 5
 
 
409
  max_length = 128
410
  elif setting == 2:
411
+ num_return_sequences = 3
412
+ num_beams = 7
 
 
413
  max_length = 192
414
  elif setting == 3:
415
+ num_return_sequences = 3
416
+ num_beams = 9
 
 
417
  max_length = 256
418
  elif setting == 4:
419
+ num_return_sequences = 3
420
+ num_beams = 11
 
 
421
  max_length = 320
422
  else:
423
+ num_return_sequences = 3
424
+ num_beams = 15
 
 
425
  max_length = 384
426
 
 
 
 
 
427
  formatted_output = "Original text:\n" + text + "\n\n"
428
  formatted_output += "Paraphrased versions:\n"
429
 
 
435
  }
436
 
437
  for i, sentence in enumerate(sentences):
438
+ text = "paraphrase: " + sentence + " </s>"
439
+ encoding = paraphraser_tokenizer.encode_plus(text, max_length=max_length, padding=True, return_tensors="pt")
440
+ input_ids, attention_mask = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
441
+
442
+ paraphraser_model.eval()
443
+ beam_outputs = paraphraser_model.generate(
444
+ input_ids=input_ids,
445
+ attention_mask=attention_mask,
446
+ max_length=max_length,
447
+ early_stopping=True,
448
+ num_beams=num_beams,
449
+ num_return_sequences=num_return_sequences
450
  )
451
 
452
+ paraphrases_texts = [paraphraser_tokenizer.decode(beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for beam_output in beam_outputs]
453
 
454
  formatted_output += f"Original sentence {i+1}: {sentence}\n"
455
  for j, paraphrase in enumerate(paraphrases_texts, 1):
 
479
  label, score = classify_text(version)
480
  formatted_output += f"Version {i}:\n{version}\n"
481
  formatted_output += f"Classification: {label} (confidence: {score:.2%})\n\n"
482
+ if label == "human-produced" or (label == "machine-generated" and score < 0.90): # Adjusted threshold
483
  human_versions.append((version, label, score))
484
 
485
  formatted_output += "\nHuman-like or Less Confident Machine-generated versions:\n"