Spaces:
Runtime error
Runtime error
Kartikeyssj2
commited on
Commit
·
73cfd62
1
Parent(s):
704855e
updates
Browse files- Dockerfile +14 -0
- Whisper_Word2Vec_Deployment +1 -0
- __pycache__/fast_api.cpython-312.pyc +0 -0
- download_models.py +17 -0
- fast_api.py +524 -0
- pronunciation_fluency_v2.pkl +3 -0
- requirements.txt +12 -0
- trasncribe.py +0 -0
- whisper_tiny_model.pt +3 -0
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.12.3-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY requirements.txt .
|
6 |
+
|
7 |
+
RUN pip install --upgrade pip \
|
8 |
+
&& pip install -r requirements.txt
|
9 |
+
|
10 |
+
COPY . .
|
11 |
+
|
12 |
+
|
13 |
+
# Use 4 worker processes to handle requests efficiently.
|
14 |
+
CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "fast_api:app"]
|
Whisper_Word2Vec_Deployment
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 40c6120d1ba5b73520a1c80ad84c09377663b28f
|
__pycache__/fast_api.cpython-312.pyc
ADDED
Binary file (18.7 kB). View file
|
|
download_models.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gensim.downloader as api
|
2 |
+
import os
|
3 |
+
import whisper
|
4 |
+
import torch
|
5 |
+
|
6 |
+
# LOAD THE WORD2VEC MODEL
|
7 |
+
word_2_vec = api.load('word2vec-google-news-300')
|
8 |
+
|
9 |
+
# SAVE THE WORD2VEC MODEL LOCALLY
|
10 |
+
word_2_vec.save("word2vec-google-news-300.model")
|
11 |
+
|
12 |
+
# LOAD THE WHISPER MODEL
|
13 |
+
model = whisper.load_model("tiny")
|
14 |
+
|
15 |
+
# SAVE THE WHISPER MODEL LOCALLY USING TORCH
|
16 |
+
save_path = "whisper_tiny_model.pt" # CHOOSE YOUR DESIRED FILE NAME
|
17 |
+
torch.save(model.state_dict(), save_path) # SAVE MODEL STATE DICTIONARY
|
fast_api.py
ADDED
@@ -0,0 +1,524 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
3 |
+
from pydantic import BaseModel
|
4 |
+
import gensim.downloader as api
|
5 |
+
from gensim.models import KeyedVectors
|
6 |
+
import torch
|
7 |
+
import pickle
|
8 |
+
import numpy as np
|
9 |
+
from gensim.models import KeyedVectors
|
10 |
+
|
11 |
+
# Load the saved Word2Vec model
|
12 |
+
word2vec_model = KeyedVectors.load("word2vec-google-news-300.model")
|
13 |
+
|
14 |
+
model = whisper.load_model("tiny")
|
15 |
+
|
16 |
+
# Load the saved state dictionary
|
17 |
+
model_state = torch.load("whisper_tiny_model.pt")
|
18 |
+
|
19 |
+
# Load the state dictionary into the model
|
20 |
+
model.load_state_dict(model_state)
|
21 |
+
|
22 |
+
def load_model(pickle_file_path: str):
|
23 |
+
"""Load a model from a pickle file."""
|
24 |
+
with open(pickle_file_path, 'rb') as file:
|
25 |
+
model = pickle.load(file)
|
26 |
+
return model
|
27 |
+
|
28 |
+
|
29 |
+
pronunciation_fluency_model = load_model("pronunciation_fluency_v2.pkl")
|
30 |
+
|
31 |
+
app = FastAPI()
|
32 |
+
|
33 |
+
|
34 |
+
def transcribe(audio_file_path: str, model):
|
35 |
+
# Load audio and run inference
|
36 |
+
result = model.transcribe(audio_file_path)
|
37 |
+
return result["text"]
|
38 |
+
|
39 |
+
@app.post("/transcribe")
|
40 |
+
async def transcribe_audio(file: UploadFile = File(...)):
|
41 |
+
|
42 |
+
# SAVE THE UPLOAD FILE TEMPORARILY
|
43 |
+
with open(file.filename, "wb") as buffer:
|
44 |
+
|
45 |
+
buffer.write(await file.read())
|
46 |
+
|
47 |
+
# TRANSCRIBE THE AUDIO
|
48 |
+
transcription = transcribe(file.filename, model)
|
49 |
+
|
50 |
+
return { "transcription" : transcription }
|
51 |
+
|
52 |
+
|
53 |
+
def Get_P_F_Score( transcription : str ):
|
54 |
+
words = transcription.split()
|
55 |
+
|
56 |
+
cumulative_vector_representation = [0] * 300
|
57 |
+
for word in words:
|
58 |
+
if word in word2vec_model:
|
59 |
+
cumulative_vector_representation += word2vec_model[word]
|
60 |
+
|
61 |
+
print( cumulative_vector_representation[ 0 : 5] )
|
62 |
+
|
63 |
+
print( len( cumulative_vector_representation) )
|
64 |
+
|
65 |
+
if np.any(np.isnan(cumulative_vector_representation)):
|
66 |
+
print("Input contains NaN values, handle missing values before prediction.")
|
67 |
+
|
68 |
+
|
69 |
+
print("\n\n")
|
70 |
+
|
71 |
+
output = pronunciation_fluency_model.predict( [ cumulative_vector_representation] )
|
72 |
+
|
73 |
+
print( output )
|
74 |
+
|
75 |
+
return output
|
76 |
+
|
77 |
+
|
78 |
+
def get_average_vector(sentence):
|
79 |
+
# TOKENIZE THE SENTENCE INTO WORDS
|
80 |
+
words = sentence.lower().split()
|
81 |
+
|
82 |
+
# FILTER OUT WORDS NOT IN THE WORD2VEC VOCABULARY
|
83 |
+
valid_words = [word for word in words if word in word2vec_model]
|
84 |
+
|
85 |
+
# RETURN ZERO VECTOR IF NO VALID WORDS FOUND
|
86 |
+
if not valid_words:
|
87 |
+
return np.zeros(word2vec_model.vector_size)
|
88 |
+
|
89 |
+
# COMPUTE AVERAGE VECTOR FOR VALID WORDS
|
90 |
+
return np.mean([word2vec_model[word] for word in valid_words], axis=0)
|
91 |
+
|
92 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
93 |
+
|
94 |
+
def get_similarity_score(topic, transcription ):
|
95 |
+
# GET AVERAGE VECTORS FOR BOTH STRINGS
|
96 |
+
topic_vector = get_average_vector(topic)
|
97 |
+
transcription_vector = get_average_vector(transcription)
|
98 |
+
|
99 |
+
print("topic vector: " , topic_vector)
|
100 |
+
|
101 |
+
print(" transcription vector: " , transcription_vector )
|
102 |
+
|
103 |
+
# RESHAPE VECTORS FOR COSINE SIMILARITY
|
104 |
+
topic_vector = topic_vector.reshape(1, -1)
|
105 |
+
transcription_vector = transcription_vector.reshape(1, -1)
|
106 |
+
|
107 |
+
print(" reshaping done ")
|
108 |
+
|
109 |
+
# COMPUTE COSINE SIMILARITY
|
110 |
+
similarity = cosine_similarity(topic_vector, transcription_vector)
|
111 |
+
|
112 |
+
print(" Similarity: " , similarity )
|
113 |
+
|
114 |
+
output = similarity[ 0 ][ 0 ]
|
115 |
+
|
116 |
+
output = max( output , 0 )
|
117 |
+
|
118 |
+
output = min( 100 , output )
|
119 |
+
|
120 |
+
# RETURN SIMILARITY SCORE (IT'S A SINGLE VALUE)
|
121 |
+
return output
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
@app.post("/pronunciation_fluency_score")
|
126 |
+
|
127 |
+
async def pronunciation_fluency_scoring(
|
128 |
+
file: UploadFile = File(...),
|
129 |
+
topic: str = File(...)
|
130 |
+
):
|
131 |
+
# SAVE THE UPLOAD FILE TEMPORARILY
|
132 |
+
with open(file.filename, "wb") as buffer:
|
133 |
+
|
134 |
+
buffer.write(await file.read())
|
135 |
+
|
136 |
+
# TRANSCRIBE THE AUDIO
|
137 |
+
transcription = transcribe(file.filename, model)
|
138 |
+
|
139 |
+
pronunciation_fluency_score = Get_P_F_Score( transcription )
|
140 |
+
|
141 |
+
print( pronunciation_fluency_score)
|
142 |
+
|
143 |
+
print( type( pronunciation_fluency_score ) )
|
144 |
+
|
145 |
+
content_score = get_similarity_score( topic , transcription) * 100
|
146 |
+
|
147 |
+
|
148 |
+
|
149 |
+
|
150 |
+
return {
|
151 |
+
|
152 |
+
"pronunciation score" : pronunciation_fluency_score[ 0 ][ 0 ] * 10 ,
|
153 |
+
"fluency score" : pronunciation_fluency_score[ 0 ][ 1 ] * 10 ,
|
154 |
+
"content score" : content_score
|
155 |
+
}
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
import string
|
160 |
+
import asyncio
|
161 |
+
import re
|
162 |
+
from textblob import TextBlob
|
163 |
+
import nltk
|
164 |
+
|
165 |
+
def is_valid_summary_format(summary: str) -> bool:
|
166 |
+
# CHECK IF THE SUMMARY CONTAINS ONLY BULLET POINTS
|
167 |
+
if '-' in summary or '*' in summary:
|
168 |
+
return True
|
169 |
+
|
170 |
+
# CHECK IF THE SUMMARY CONSISTS ONLY OF VERY SHORT SENTENCES
|
171 |
+
sentences = re.split(r'[.!?]', summary)
|
172 |
+
short_sentences = sum(len(sentence.split()) <= 70 for sentence in sentences if sentence.strip())
|
173 |
+
|
174 |
+
print(" Short Sentences: " , short_sentences )
|
175 |
+
|
176 |
+
# CONSIDER IT A VALID FORMAT IF MORE THAN HALF OF THE SENTENCES ARE SHORT
|
177 |
+
return short_sentences >= len(sentences) / 2
|
178 |
+
|
179 |
+
def form_score_summary(summary: str) -> float:
|
180 |
+
# CONVERT THE SUMMARY TO UPPERCASE
|
181 |
+
summary_upper = summary.upper()
|
182 |
+
|
183 |
+
# REMOVE PUNCTUATION
|
184 |
+
summary_clean = re.sub(r'[^\w\s]', '', summary_upper)
|
185 |
+
|
186 |
+
# COUNT THE NUMBER OF WORDS
|
187 |
+
word_count = len(summary_clean.split())
|
188 |
+
|
189 |
+
# CHECK IF THE SUMMARY FORMAT IS VALID
|
190 |
+
valid_format = is_valid_summary_format(summary)
|
191 |
+
|
192 |
+
print("\n\n word count: ", word_count, " valid_format: ", valid_format)
|
193 |
+
|
194 |
+
# CALCULATE SCORE BASED ON WORD COUNT AND FORMAT
|
195 |
+
if valid_format:
|
196 |
+
if 45 <= word_count <= 75:
|
197 |
+
if word_count < 50:
|
198 |
+
score = 50 + (word_count - 45) * (50 / 5) # Gradual increase from 50
|
199 |
+
elif word_count <= 75:
|
200 |
+
score = 100 # Best score range
|
201 |
+
else:
|
202 |
+
score = 100 - (word_count - 70) * (50 / 5) # Gradual decrease from 100
|
203 |
+
else:
|
204 |
+
score = 0 # Worst score if word count is out of acceptable range
|
205 |
+
else:
|
206 |
+
score = 0 # Worst score if format is invalid
|
207 |
+
|
208 |
+
# CLAMP SCORE BETWEEN 0 AND 100
|
209 |
+
|
210 |
+
score = float( score )
|
211 |
+
|
212 |
+
return max(0.0, min(100.0, score))
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
def grammar_score(text: str) -> int:
|
218 |
+
# Create a TextBlob object
|
219 |
+
blob = TextBlob(text)
|
220 |
+
|
221 |
+
# Check for grammatical errors
|
222 |
+
errors = 0
|
223 |
+
for sentence in blob.sentences:
|
224 |
+
if sentence.correct() != sentence:
|
225 |
+
errors += 1
|
226 |
+
|
227 |
+
print(" \n\n Number of grammatical errors: " , errors )
|
228 |
+
|
229 |
+
errors *= 5
|
230 |
+
|
231 |
+
result = 100 - errors
|
232 |
+
|
233 |
+
return max( 0 , result)
|
234 |
+
|
235 |
+
|
236 |
+
def vocabulary_score(text: str) -> float:
|
237 |
+
|
238 |
+
print(" Performing vocabulary score \n\n")
|
239 |
+
|
240 |
+
# Create a TextBlob object
|
241 |
+
blob = TextBlob(text)
|
242 |
+
|
243 |
+
# Extract words from the text
|
244 |
+
words = blob.words
|
245 |
+
|
246 |
+
# Count the total words and correctly spelled words
|
247 |
+
total_words = len(words)
|
248 |
+
correctly_spelled = sum(1 for word in words if word == TextBlob(word).correct())
|
249 |
+
|
250 |
+
# Calculate the percentage of correctly spelled words
|
251 |
+
if total_words == 0:
|
252 |
+
return 0.0 # Avoid division by zero if there are no words
|
253 |
+
|
254 |
+
percentage_correct = (correctly_spelled / total_words) * 100
|
255 |
+
|
256 |
+
percentage_correct = min( percentage_correct , 100)
|
257 |
+
percentage_correct = max( 0 , percentage_correct )
|
258 |
+
|
259 |
+
percentage_correct = round( percentage_correct , 2 )
|
260 |
+
|
261 |
+
|
262 |
+
print(" Percentage Correct: " , percentage_correct )
|
263 |
+
|
264 |
+
|
265 |
+
return percentage_correct
|
266 |
+
|
267 |
+
|
268 |
+
@app.post("/summarization_scoring/")
|
269 |
+
def summarization_score( essay : str = Form() , summarization : str = Form() ):
|
270 |
+
|
271 |
+
content_score_result, form_score_result, grammar_score_result, vocabulary_score_result = (
|
272 |
+
float( get_similarity_score(essay, summarization) ) * 100,
|
273 |
+
float( form_score_summary(summarization) ),
|
274 |
+
float( grammar_score(summarization) ),
|
275 |
+
float( vocabulary_score(summarization) )
|
276 |
+
)
|
277 |
+
|
278 |
+
print(" Completed \n\n\n ")
|
279 |
+
|
280 |
+
response = {
|
281 |
+
|
282 |
+
"Content Score: " : content_score_result ,
|
283 |
+
"Form Score: " : form_score_result ,
|
284 |
+
"Grammar Score: " : grammar_score_result ,
|
285 |
+
"Vocabulary Score: " : vocabulary_score_result ,
|
286 |
+
"Overall Summarization Score: " : round( (content_score_result + form_score_result + grammar_score_result + vocabulary_score_result) / 4 , 2)
|
287 |
+
}
|
288 |
+
|
289 |
+
print( response )
|
290 |
+
|
291 |
+
return response
|
292 |
+
|
293 |
+
|
294 |
+
|
295 |
+
'''
|
296 |
+
transitional words can significantly contribute to the development, structure, and coherence of a text.
|
297 |
+
|
298 |
+
Development: Transitional words help to show how ideas build upon each other and progress
|
299 |
+
throughout the essay. They can introduce new points, provide examples, or signal a shift in focus.
|
300 |
+
|
301 |
+
Structure: Transitional words help to organize the text by indicating relationships between
|
302 |
+
ideas. They can show cause and effect, compare and contrast, or signal a sequence of events.
|
303 |
+
|
304 |
+
Coherence: Transitional words help to create a smooth flow between sentences and paragraphs,
|
305 |
+
making the text easier to understand and follow. They can clarify connections between
|
306 |
+
ideas and prevent the text from feeling disjointed.
|
307 |
+
'''
|
308 |
+
|
309 |
+
|
310 |
+
addition_transitional_words = [
|
311 |
+
"and", "also", "too", "in addition", "furthermore", "moreover", "besides", "likewise",
|
312 |
+
"similarly", "equally important", "not to mention", "as well as", "what's more",
|
313 |
+
"on top of that", "to boot", "in the same way", "by the same token", "similarly",
|
314 |
+
"likewise", "in a similar vein", "correspondingly", "at the same time", "concurrently",
|
315 |
+
"simultaneously", "not only... but also", "both... and", "as well", "and then",
|
316 |
+
"and so forth", "and so on"
|
317 |
+
]
|
318 |
+
contrast_transitional_words = [
|
319 |
+
"but", "however", "nevertheless", "nonetheless", "on the other hand", "on the contrary",
|
320 |
+
"in contrast", "conversely", "although", "though", "even though", "despite", "in spite of",
|
321 |
+
"regardless of", "while", "whereas", "yet", "still", "even so", "even if", "at the same time",
|
322 |
+
"by the same token", "equally", "in common", "similarly", "just like", "just as", "as well as",
|
323 |
+
"resemble", "equally", "in common", "by the same token"
|
324 |
+
]
|
325 |
+
cause_effect_transitional_words = [
|
326 |
+
"because", "since", "as", "due to", "owing to", "thanks to", "on account of",
|
327 |
+
"as a result", "consequently", "therefore", "hence", "thus", "so", "accordingly",
|
328 |
+
"for this reason", "as a consequence", "in consequence", "in that case",
|
329 |
+
"that being the case", "for that reason", "as a result of", "because of",
|
330 |
+
"on account of", "owing to", "due to", "thanks to"
|
331 |
+
]
|
332 |
+
time_transitional_words = [
|
333 |
+
"first", "second", "third", "next", "then", "after", "before", "later", "earlier",
|
334 |
+
"previously", "subsequently", "following", "meanwhile", "simultaneously",
|
335 |
+
"at the same time", "concurrently", "in the meantime", "in the interim", "afterwards",
|
336 |
+
"thereafter", "finally", "lastly", "ultimately", "in conclusion", "to conclude",
|
337 |
+
"in summary", "to sum up"
|
338 |
+
]
|
339 |
+
emphasis_transitional_words = [
|
340 |
+
"indeed", "in fact", "certainly", "assuredly", "without a doubt", "undoubtedly",
|
341 |
+
"unquestionably", "undeniably", "absolutely", "positively", "emphatically",
|
342 |
+
"decisively", "strongly", "forcefully", "with conviction", "with certainty",
|
343 |
+
"with assurance", "without hesitation", "without question", "without fail", "without doubt"
|
344 |
+
]
|
345 |
+
example_transitional_words = [
|
346 |
+
"for example", "for instance", "such as", "like", "as an illustration", "to illustrate",
|
347 |
+
"to demonstrate", "to exemplify", "namely", "specifically", "in particular",
|
348 |
+
"particularly", "especially"
|
349 |
+
]
|
350 |
+
conclusion_transitional_words = [
|
351 |
+
"in conclusion", "to conclude", "in summary", "to sum up", "finally", "lastly",
|
352 |
+
"ultimately", "therefore", "hence", "thus", "so", "accordingly", "as a result",
|
353 |
+
"consequently"
|
354 |
+
]
|
355 |
+
transition_between_sections_transitional_words = [
|
356 |
+
"in the following section", "moving on to", "now", "let's explore",
|
357 |
+
"turning our attention to", "to delve deeper", "we will now examine",
|
358 |
+
"next", "at this point", "at this juncture", "furthermore", "moreover",
|
359 |
+
"in addition"
|
360 |
+
]
|
361 |
+
miscellaneous_transition_words_list = [
|
362 |
+
# Clarification
|
363 |
+
"in other words", "that is to say", "namely", "to put it another way",
|
364 |
+
"in simpler terms", "to clarify", "to explain further", "to elaborate",
|
365 |
+
"to be more specific", "to be more exact",
|
366 |
+
|
367 |
+
# Concession
|
368 |
+
"admittedly", "granted", "of course", "naturally", "it is true that",
|
369 |
+
"it must be admitted that", "it cannot be denied that", "it goes without saying that",
|
370 |
+
|
371 |
+
# Digression
|
372 |
+
"by the way", "incidentally", "aside from that", "apart from that",
|
373 |
+
|
374 |
+
# Repetition
|
375 |
+
"again", "once again", "still", "further", "furthermore", "moreover", "in addition"
|
376 |
+
]
|
377 |
+
contrast_within_sentence_transitional_words = [
|
378 |
+
"but", "however", "nevertheless", "nonetheless", "on the other hand",
|
379 |
+
"in contrast", "conversely", "although", "though", "even though",
|
380 |
+
"despite", "in spite of", "regardless of", "while", "whereas",
|
381 |
+
"yet", "still", "even so", "even if"
|
382 |
+
]
|
383 |
+
comparison_transitional_words = [
|
384 |
+
"similarly", "likewise", "in the same way", "equally", "in common",
|
385 |
+
"by the same token", "just like", "just as", "as well as", "resemble"
|
386 |
+
]
|
387 |
+
cause_and_effect_within_sentence_transitional_words = [
|
388 |
+
"because", "since", "as", "due to", "owing to", "thanks to",
|
389 |
+
"on account of", "as a result", "consequently", "therefore",
|
390 |
+
"hence", "thus", "so", "accordingly", "for this reason",
|
391 |
+
"as a consequence", "in consequence", "in that case",
|
392 |
+
"that being the case", "for that reason", "as a result of",
|
393 |
+
"because of", "on account of", "owing to", "due to", "thanks to"
|
394 |
+
]
|
395 |
+
emphasis_within_sentence_transitional_words = [
|
396 |
+
"indeed", "in fact", "certainly", "assuredly", "without a doubt",
|
397 |
+
"undoubtedly", "unquestionably", "undeniably", "absolutely",
|
398 |
+
"positively", "emphatically", "decisively", "strongly", "forcefully",
|
399 |
+
"with conviction", "with certainty", "with assurance",
|
400 |
+
"without hesitation", "without question", "without fail", "without doubt"
|
401 |
+
]
|
402 |
+
concession_digression_repetition_transitional_words = [
|
403 |
+
# Concession
|
404 |
+
"admittedly", "granted", "of course", "naturally",
|
405 |
+
"it is true that", "it must be admitted that",
|
406 |
+
"it cannot be denied that", "it goes without saying that",
|
407 |
+
|
408 |
+
# Digression
|
409 |
+
"by the way", "incidentally", "aside from that",
|
410 |
+
"apart from that",
|
411 |
+
|
412 |
+
# Repetition
|
413 |
+
"again", "once again", "still", "further",
|
414 |
+
"furthermore", "moreover", "in addition"
|
415 |
+
]
|
416 |
+
|
417 |
+
def dsc_score( essay: str ):
|
418 |
+
# Normalize the essay
|
419 |
+
essay_lower = essay.lower()
|
420 |
+
|
421 |
+
# Helper function to count occurrences of transitional words
|
422 |
+
def count_transitional_words(word_list):
|
423 |
+
return sum(essay_lower.count(word) for word in word_list)
|
424 |
+
|
425 |
+
# Calculate counts for each type of transitional word list
|
426 |
+
addition_count = count_transitional_words(addition_transitional_words)
|
427 |
+
contrast_count = count_transitional_words(contrast_transitional_words)
|
428 |
+
cause_effect_count = count_transitional_words(cause_effect_transitional_words)
|
429 |
+
time_count = count_transitional_words(time_transitional_words)
|
430 |
+
emphasis_count = count_transitional_words(emphasis_transitional_words)
|
431 |
+
example_count = count_transitional_words(example_transitional_words)
|
432 |
+
conclusion_count = count_transitional_words(conclusion_transitional_words)
|
433 |
+
transition_between_sections_count = count_transitional_words(transition_between_sections_transitional_words)
|
434 |
+
misc_count = count_transitional_words(miscellaneous_transition_words_list)
|
435 |
+
contrast_within_sentence_count = count_transitional_words(contrast_within_sentence_transitional_words)
|
436 |
+
comparison_count = count_transitional_words(comparison_transitional_words)
|
437 |
+
cause_and_effect_within_sentence_count = count_transitional_words(cause_and_effect_within_sentence_transitional_words)
|
438 |
+
emphasis_within_sentence_count = count_transitional_words(emphasis_within_sentence_transitional_words)
|
439 |
+
concession_digression_repetition_count = count_transitional_words(concession_digression_repetition_transitional_words)
|
440 |
+
|
441 |
+
# Calculate total transitional word count
|
442 |
+
total_transitional_count = (
|
443 |
+
addition_count + contrast_count + cause_effect_count + time_count +
|
444 |
+
emphasis_count + example_count + conclusion_count +
|
445 |
+
transition_between_sections_count + misc_count +
|
446 |
+
contrast_within_sentence_count + comparison_count +
|
447 |
+
cause_and_effect_within_sentence_count + emphasis_within_sentence_count +
|
448 |
+
concession_digression_repetition_count
|
449 |
+
)
|
450 |
+
|
451 |
+
print("\n\n\n Total Transitional Words Count: " , total_transitional_count )
|
452 |
+
|
453 |
+
words = essay.split()
|
454 |
+
word_count = len(words)
|
455 |
+
|
456 |
+
transitional_words_percentage = round( ( total_transitional_count / ( word_count * 1.00) ) * 100 , 2 )
|
457 |
+
|
458 |
+
print("]n\n\n transitional_words_percentage: " , transitional_words_percentage)
|
459 |
+
|
460 |
+
'''
|
461 |
+
Since a transition_words_percentage of 10% is considered as the ideal percentage of transitional words in an essay,
|
462 |
+
we are deducting points with respect to how much is it deviating from its ideal percentage value.
|
463 |
+
|
464 |
+
This have proven to be powerful to determine the Development, Structure and Coherence in essays
|
465 |
+
|
466 |
+
'''
|
467 |
+
return 100 - abs( transitional_words_percentage - 10 )
|
468 |
+
|
469 |
+
|
470 |
+
def is_capitalized(text: str) -> bool:
|
471 |
+
"""Check if the entire text is in capital letters."""
|
472 |
+
return text.isupper()
|
473 |
+
|
474 |
+
def contains_punctuation(text: str) -> bool:
|
475 |
+
"""Check if the text contains any punctuation."""
|
476 |
+
return bool(re.search(r'[.,!?;:]', text))
|
477 |
+
|
478 |
+
def is_bullet_points(text: str) -> bool:
|
479 |
+
"""Check if the text consists only of bullet points or very short sentences."""
|
480 |
+
sentences = text.split('\n')
|
481 |
+
bullet_points = any(line.strip().startswith('-') for line in sentences)
|
482 |
+
short_sentences = sum(len(sentence.split()) <= 2 for sentence in sentences if sentence.strip())
|
483 |
+
return bullet_points or short_sentences > len(sentences) / 2
|
484 |
+
|
485 |
+
|
486 |
+
def form_score_essay(essay: str) -> float:
|
487 |
+
# REMOVE PUNCTUATION AND COUNT WORDS
|
488 |
+
word_count = len(re.findall(r'\b\w+\b', essay))
|
489 |
+
|
490 |
+
# CHECK ESSAY FORMAT
|
491 |
+
is_capital = is_capitalized(essay)
|
492 |
+
has_punctuation = contains_punctuation(essay)
|
493 |
+
bullet_points_or_short = is_bullet_points(essay)
|
494 |
+
|
495 |
+
# CALCULATE SCORE
|
496 |
+
if 200 <= word_count <= 300 and has_punctuation and not is_capital and not bullet_points_or_short:
|
497 |
+
score = 100.0 # BEST SCORE
|
498 |
+
elif (120 <= word_count <= 199 or 301 <= word_count <= 380) and has_punctuation and not is_capital and not bullet_points_or_short:
|
499 |
+
score = 50.0 # AVERAGE SCORE
|
500 |
+
else:
|
501 |
+
score = 0.0 # WORST SCORE
|
502 |
+
|
503 |
+
return score
|
504 |
+
|
505 |
+
|
506 |
+
@app.post("/essay_scoring/")
|
507 |
+
async def essay_score( prompt : str = Form() , essay : str = Form() ):
|
508 |
+
content_score_result, form_score_result, dsc_score_result, grammar_score_result = (
|
509 |
+
float( get_similarity_score( prompt , essay ) ) * 100,
|
510 |
+
float( form_score_essay( essay ) ),
|
511 |
+
float( dsc_score( essay ) ),
|
512 |
+
float( grammar_score( essay ) )
|
513 |
+
)
|
514 |
+
|
515 |
+
print( essay )
|
516 |
+
|
517 |
+
return {
|
518 |
+
|
519 |
+
"Content Score: " : content_score_result,
|
520 |
+
"Form Score: " : form_score_result,
|
521 |
+
"DSC Score: " : dsc_score_result,
|
522 |
+
"Grammar Score: " : grammar_score_result,
|
523 |
+
"Overall Essay Score" : ( content_score_result + form_score_result + dsc_score_result + grammar_score_result) / 4.0
|
524 |
+
}
|
pronunciation_fluency_v2.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25f2404a15f08d5ff7adc3bfb9721b5d4c2e65a05acbcc808a2d2d9d2bd24d57
|
3 |
+
size 27837151
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/openai/whisper.git
|
2 |
+
whisper
|
3 |
+
fastapi
|
4 |
+
pydantic
|
5 |
+
uvicorn
|
6 |
+
python-multipart
|
7 |
+
gunicorn
|
8 |
+
gensim
|
9 |
+
scikit-learn
|
10 |
+
numpy
|
11 |
+
textblob
|
12 |
+
nltk
|
trasncribe.py
ADDED
File without changes
|
whisper_tiny_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c80a3201cc10ca84a80717069768f68fbab09a35bff458f77a120e4aa210dee
|
3 |
+
size 151102205
|