Kartikeyssj2 commited on
Commit
73cfd62
·
1 Parent(s): 704855e
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12.3-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+
7
+ RUN pip install --upgrade pip \
8
+ && pip install -r requirements.txt
9
+
10
+ COPY . .
11
+
12
+
13
+ # Use 4 worker processes to handle requests efficiently.
14
+ CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "fast_api:app"]
Whisper_Word2Vec_Deployment ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 40c6120d1ba5b73520a1c80ad84c09377663b28f
__pycache__/fast_api.cpython-312.pyc ADDED
Binary file (18.7 kB). View file
 
download_models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gensim.downloader as api
2
+ import os
3
+ import whisper
4
+ import torch
5
+
6
+ # LOAD THE WORD2VEC MODEL
7
+ word_2_vec = api.load('word2vec-google-news-300')
8
+
9
+ # SAVE THE WORD2VEC MODEL LOCALLY
10
+ word_2_vec.save("word2vec-google-news-300.model")
11
+
12
+ # LOAD THE WHISPER MODEL
13
+ model = whisper.load_model("tiny")
14
+
15
+ # SAVE THE WHISPER MODEL LOCALLY USING TORCH
16
+ save_path = "whisper_tiny_model.pt" # CHOOSE YOUR DESIRED FILE NAME
17
+ torch.save(model.state_dict(), save_path) # SAVE MODEL STATE DICTIONARY
fast_api.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from fastapi import FastAPI, UploadFile, File, Form
3
+ from pydantic import BaseModel
4
+ import gensim.downloader as api
5
+ from gensim.models import KeyedVectors
6
+ import torch
7
+ import pickle
8
+ import numpy as np
9
+ from gensim.models import KeyedVectors
10
+
11
+ # Load the saved Word2Vec model
12
+ word2vec_model = KeyedVectors.load("word2vec-google-news-300.model")
13
+
14
+ model = whisper.load_model("tiny")
15
+
16
+ # Load the saved state dictionary
17
+ model_state = torch.load("whisper_tiny_model.pt")
18
+
19
+ # Load the state dictionary into the model
20
+ model.load_state_dict(model_state)
21
+
22
+ def load_model(pickle_file_path: str):
23
+ """Load a model from a pickle file."""
24
+ with open(pickle_file_path, 'rb') as file:
25
+ model = pickle.load(file)
26
+ return model
27
+
28
+
29
+ pronunciation_fluency_model = load_model("pronunciation_fluency_v2.pkl")
30
+
31
+ app = FastAPI()
32
+
33
+
34
+ def transcribe(audio_file_path: str, model):
35
+ # Load audio and run inference
36
+ result = model.transcribe(audio_file_path)
37
+ return result["text"]
38
+
39
+ @app.post("/transcribe")
40
+ async def transcribe_audio(file: UploadFile = File(...)):
41
+
42
+ # SAVE THE UPLOAD FILE TEMPORARILY
43
+ with open(file.filename, "wb") as buffer:
44
+
45
+ buffer.write(await file.read())
46
+
47
+ # TRANSCRIBE THE AUDIO
48
+ transcription = transcribe(file.filename, model)
49
+
50
+ return { "transcription" : transcription }
51
+
52
+
53
+ def Get_P_F_Score( transcription : str ):
54
+ words = transcription.split()
55
+
56
+ cumulative_vector_representation = [0] * 300
57
+ for word in words:
58
+ if word in word2vec_model:
59
+ cumulative_vector_representation += word2vec_model[word]
60
+
61
+ print( cumulative_vector_representation[ 0 : 5] )
62
+
63
+ print( len( cumulative_vector_representation) )
64
+
65
+ if np.any(np.isnan(cumulative_vector_representation)):
66
+ print("Input contains NaN values, handle missing values before prediction.")
67
+
68
+
69
+ print("\n\n")
70
+
71
+ output = pronunciation_fluency_model.predict( [ cumulative_vector_representation] )
72
+
73
+ print( output )
74
+
75
+ return output
76
+
77
+
78
+ def get_average_vector(sentence):
79
+ # TOKENIZE THE SENTENCE INTO WORDS
80
+ words = sentence.lower().split()
81
+
82
+ # FILTER OUT WORDS NOT IN THE WORD2VEC VOCABULARY
83
+ valid_words = [word for word in words if word in word2vec_model]
84
+
85
+ # RETURN ZERO VECTOR IF NO VALID WORDS FOUND
86
+ if not valid_words:
87
+ return np.zeros(word2vec_model.vector_size)
88
+
89
+ # COMPUTE AVERAGE VECTOR FOR VALID WORDS
90
+ return np.mean([word2vec_model[word] for word in valid_words], axis=0)
91
+
92
+ from sklearn.metrics.pairwise import cosine_similarity
93
+
94
+ def get_similarity_score(topic, transcription ):
95
+ # GET AVERAGE VECTORS FOR BOTH STRINGS
96
+ topic_vector = get_average_vector(topic)
97
+ transcription_vector = get_average_vector(transcription)
98
+
99
+ print("topic vector: " , topic_vector)
100
+
101
+ print(" transcription vector: " , transcription_vector )
102
+
103
+ # RESHAPE VECTORS FOR COSINE SIMILARITY
104
+ topic_vector = topic_vector.reshape(1, -1)
105
+ transcription_vector = transcription_vector.reshape(1, -1)
106
+
107
+ print(" reshaping done ")
108
+
109
+ # COMPUTE COSINE SIMILARITY
110
+ similarity = cosine_similarity(topic_vector, transcription_vector)
111
+
112
+ print(" Similarity: " , similarity )
113
+
114
+ output = similarity[ 0 ][ 0 ]
115
+
116
+ output = max( output , 0 )
117
+
118
+ output = min( 100 , output )
119
+
120
+ # RETURN SIMILARITY SCORE (IT'S A SINGLE VALUE)
121
+ return output
122
+
123
+
124
+
125
+ @app.post("/pronunciation_fluency_score")
126
+
127
+ async def pronunciation_fluency_scoring(
128
+ file: UploadFile = File(...),
129
+ topic: str = File(...)
130
+ ):
131
+ # SAVE THE UPLOAD FILE TEMPORARILY
132
+ with open(file.filename, "wb") as buffer:
133
+
134
+ buffer.write(await file.read())
135
+
136
+ # TRANSCRIBE THE AUDIO
137
+ transcription = transcribe(file.filename, model)
138
+
139
+ pronunciation_fluency_score = Get_P_F_Score( transcription )
140
+
141
+ print( pronunciation_fluency_score)
142
+
143
+ print( type( pronunciation_fluency_score ) )
144
+
145
+ content_score = get_similarity_score( topic , transcription) * 100
146
+
147
+
148
+
149
+
150
+ return {
151
+
152
+ "pronunciation score" : pronunciation_fluency_score[ 0 ][ 0 ] * 10 ,
153
+ "fluency score" : pronunciation_fluency_score[ 0 ][ 1 ] * 10 ,
154
+ "content score" : content_score
155
+ }
156
+
157
+
158
+
159
+ import string
160
+ import asyncio
161
+ import re
162
+ from textblob import TextBlob
163
+ import nltk
164
+
165
+ def is_valid_summary_format(summary: str) -> bool:
166
+ # CHECK IF THE SUMMARY CONTAINS ONLY BULLET POINTS
167
+ if '-' in summary or '*' in summary:
168
+ return True
169
+
170
+ # CHECK IF THE SUMMARY CONSISTS ONLY OF VERY SHORT SENTENCES
171
+ sentences = re.split(r'[.!?]', summary)
172
+ short_sentences = sum(len(sentence.split()) <= 70 for sentence in sentences if sentence.strip())
173
+
174
+ print(" Short Sentences: " , short_sentences )
175
+
176
+ # CONSIDER IT A VALID FORMAT IF MORE THAN HALF OF THE SENTENCES ARE SHORT
177
+ return short_sentences >= len(sentences) / 2
178
+
179
+ def form_score_summary(summary: str) -> float:
180
+ # CONVERT THE SUMMARY TO UPPERCASE
181
+ summary_upper = summary.upper()
182
+
183
+ # REMOVE PUNCTUATION
184
+ summary_clean = re.sub(r'[^\w\s]', '', summary_upper)
185
+
186
+ # COUNT THE NUMBER OF WORDS
187
+ word_count = len(summary_clean.split())
188
+
189
+ # CHECK IF THE SUMMARY FORMAT IS VALID
190
+ valid_format = is_valid_summary_format(summary)
191
+
192
+ print("\n\n word count: ", word_count, " valid_format: ", valid_format)
193
+
194
+ # CALCULATE SCORE BASED ON WORD COUNT AND FORMAT
195
+ if valid_format:
196
+ if 45 <= word_count <= 75:
197
+ if word_count < 50:
198
+ score = 50 + (word_count - 45) * (50 / 5) # Gradual increase from 50
199
+ elif word_count <= 75:
200
+ score = 100 # Best score range
201
+ else:
202
+ score = 100 - (word_count - 70) * (50 / 5) # Gradual decrease from 100
203
+ else:
204
+ score = 0 # Worst score if word count is out of acceptable range
205
+ else:
206
+ score = 0 # Worst score if format is invalid
207
+
208
+ # CLAMP SCORE BETWEEN 0 AND 100
209
+
210
+ score = float( score )
211
+
212
+ return max(0.0, min(100.0, score))
213
+
214
+
215
+
216
+
217
+ def grammar_score(text: str) -> int:
218
+ # Create a TextBlob object
219
+ blob = TextBlob(text)
220
+
221
+ # Check for grammatical errors
222
+ errors = 0
223
+ for sentence in blob.sentences:
224
+ if sentence.correct() != sentence:
225
+ errors += 1
226
+
227
+ print(" \n\n Number of grammatical errors: " , errors )
228
+
229
+ errors *= 5
230
+
231
+ result = 100 - errors
232
+
233
+ return max( 0 , result)
234
+
235
+
236
+ def vocabulary_score(text: str) -> float:
237
+
238
+ print(" Performing vocabulary score \n\n")
239
+
240
+ # Create a TextBlob object
241
+ blob = TextBlob(text)
242
+
243
+ # Extract words from the text
244
+ words = blob.words
245
+
246
+ # Count the total words and correctly spelled words
247
+ total_words = len(words)
248
+ correctly_spelled = sum(1 for word in words if word == TextBlob(word).correct())
249
+
250
+ # Calculate the percentage of correctly spelled words
251
+ if total_words == 0:
252
+ return 0.0 # Avoid division by zero if there are no words
253
+
254
+ percentage_correct = (correctly_spelled / total_words) * 100
255
+
256
+ percentage_correct = min( percentage_correct , 100)
257
+ percentage_correct = max( 0 , percentage_correct )
258
+
259
+ percentage_correct = round( percentage_correct , 2 )
260
+
261
+
262
+ print(" Percentage Correct: " , percentage_correct )
263
+
264
+
265
+ return percentage_correct
266
+
267
+
268
+ @app.post("/summarization_scoring/")
269
+ def summarization_score( essay : str = Form() , summarization : str = Form() ):
270
+
271
+ content_score_result, form_score_result, grammar_score_result, vocabulary_score_result = (
272
+ float( get_similarity_score(essay, summarization) ) * 100,
273
+ float( form_score_summary(summarization) ),
274
+ float( grammar_score(summarization) ),
275
+ float( vocabulary_score(summarization) )
276
+ )
277
+
278
+ print(" Completed \n\n\n ")
279
+
280
+ response = {
281
+
282
+ "Content Score: " : content_score_result ,
283
+ "Form Score: " : form_score_result ,
284
+ "Grammar Score: " : grammar_score_result ,
285
+ "Vocabulary Score: " : vocabulary_score_result ,
286
+ "Overall Summarization Score: " : round( (content_score_result + form_score_result + grammar_score_result + vocabulary_score_result) / 4 , 2)
287
+ }
288
+
289
+ print( response )
290
+
291
+ return response
292
+
293
+
294
+
295
+ '''
296
+ transitional words can significantly contribute to the development, structure, and coherence of a text.
297
+
298
+ Development: Transitional words help to show how ideas build upon each other and progress
299
+ throughout the essay. They can introduce new points, provide examples, or signal a shift in focus.
300
+
301
+ Structure: Transitional words help to organize the text by indicating relationships between
302
+ ideas. They can show cause and effect, compare and contrast, or signal a sequence of events.
303
+
304
+ Coherence: Transitional words help to create a smooth flow between sentences and paragraphs,
305
+ making the text easier to understand and follow. They can clarify connections between
306
+ ideas and prevent the text from feeling disjointed.
307
+ '''
308
+
309
+
310
+ addition_transitional_words = [
311
+ "and", "also", "too", "in addition", "furthermore", "moreover", "besides", "likewise",
312
+ "similarly", "equally important", "not to mention", "as well as", "what's more",
313
+ "on top of that", "to boot", "in the same way", "by the same token", "similarly",
314
+ "likewise", "in a similar vein", "correspondingly", "at the same time", "concurrently",
315
+ "simultaneously", "not only... but also", "both... and", "as well", "and then",
316
+ "and so forth", "and so on"
317
+ ]
318
+ contrast_transitional_words = [
319
+ "but", "however", "nevertheless", "nonetheless", "on the other hand", "on the contrary",
320
+ "in contrast", "conversely", "although", "though", "even though", "despite", "in spite of",
321
+ "regardless of", "while", "whereas", "yet", "still", "even so", "even if", "at the same time",
322
+ "by the same token", "equally", "in common", "similarly", "just like", "just as", "as well as",
323
+ "resemble", "equally", "in common", "by the same token"
324
+ ]
325
+ cause_effect_transitional_words = [
326
+ "because", "since", "as", "due to", "owing to", "thanks to", "on account of",
327
+ "as a result", "consequently", "therefore", "hence", "thus", "so", "accordingly",
328
+ "for this reason", "as a consequence", "in consequence", "in that case",
329
+ "that being the case", "for that reason", "as a result of", "because of",
330
+ "on account of", "owing to", "due to", "thanks to"
331
+ ]
332
+ time_transitional_words = [
333
+ "first", "second", "third", "next", "then", "after", "before", "later", "earlier",
334
+ "previously", "subsequently", "following", "meanwhile", "simultaneously",
335
+ "at the same time", "concurrently", "in the meantime", "in the interim", "afterwards",
336
+ "thereafter", "finally", "lastly", "ultimately", "in conclusion", "to conclude",
337
+ "in summary", "to sum up"
338
+ ]
339
+ emphasis_transitional_words = [
340
+ "indeed", "in fact", "certainly", "assuredly", "without a doubt", "undoubtedly",
341
+ "unquestionably", "undeniably", "absolutely", "positively", "emphatically",
342
+ "decisively", "strongly", "forcefully", "with conviction", "with certainty",
343
+ "with assurance", "without hesitation", "without question", "without fail", "without doubt"
344
+ ]
345
+ example_transitional_words = [
346
+ "for example", "for instance", "such as", "like", "as an illustration", "to illustrate",
347
+ "to demonstrate", "to exemplify", "namely", "specifically", "in particular",
348
+ "particularly", "especially"
349
+ ]
350
+ conclusion_transitional_words = [
351
+ "in conclusion", "to conclude", "in summary", "to sum up", "finally", "lastly",
352
+ "ultimately", "therefore", "hence", "thus", "so", "accordingly", "as a result",
353
+ "consequently"
354
+ ]
355
+ transition_between_sections_transitional_words = [
356
+ "in the following section", "moving on to", "now", "let's explore",
357
+ "turning our attention to", "to delve deeper", "we will now examine",
358
+ "next", "at this point", "at this juncture", "furthermore", "moreover",
359
+ "in addition"
360
+ ]
361
+ miscellaneous_transition_words_list = [
362
+ # Clarification
363
+ "in other words", "that is to say", "namely", "to put it another way",
364
+ "in simpler terms", "to clarify", "to explain further", "to elaborate",
365
+ "to be more specific", "to be more exact",
366
+
367
+ # Concession
368
+ "admittedly", "granted", "of course", "naturally", "it is true that",
369
+ "it must be admitted that", "it cannot be denied that", "it goes without saying that",
370
+
371
+ # Digression
372
+ "by the way", "incidentally", "aside from that", "apart from that",
373
+
374
+ # Repetition
375
+ "again", "once again", "still", "further", "furthermore", "moreover", "in addition"
376
+ ]
377
+ contrast_within_sentence_transitional_words = [
378
+ "but", "however", "nevertheless", "nonetheless", "on the other hand",
379
+ "in contrast", "conversely", "although", "though", "even though",
380
+ "despite", "in spite of", "regardless of", "while", "whereas",
381
+ "yet", "still", "even so", "even if"
382
+ ]
383
+ comparison_transitional_words = [
384
+ "similarly", "likewise", "in the same way", "equally", "in common",
385
+ "by the same token", "just like", "just as", "as well as", "resemble"
386
+ ]
387
+ cause_and_effect_within_sentence_transitional_words = [
388
+ "because", "since", "as", "due to", "owing to", "thanks to",
389
+ "on account of", "as a result", "consequently", "therefore",
390
+ "hence", "thus", "so", "accordingly", "for this reason",
391
+ "as a consequence", "in consequence", "in that case",
392
+ "that being the case", "for that reason", "as a result of",
393
+ "because of", "on account of", "owing to", "due to", "thanks to"
394
+ ]
395
+ emphasis_within_sentence_transitional_words = [
396
+ "indeed", "in fact", "certainly", "assuredly", "without a doubt",
397
+ "undoubtedly", "unquestionably", "undeniably", "absolutely",
398
+ "positively", "emphatically", "decisively", "strongly", "forcefully",
399
+ "with conviction", "with certainty", "with assurance",
400
+ "without hesitation", "without question", "without fail", "without doubt"
401
+ ]
402
+ concession_digression_repetition_transitional_words = [
403
+ # Concession
404
+ "admittedly", "granted", "of course", "naturally",
405
+ "it is true that", "it must be admitted that",
406
+ "it cannot be denied that", "it goes without saying that",
407
+
408
+ # Digression
409
+ "by the way", "incidentally", "aside from that",
410
+ "apart from that",
411
+
412
+ # Repetition
413
+ "again", "once again", "still", "further",
414
+ "furthermore", "moreover", "in addition"
415
+ ]
416
+
417
+ def dsc_score( essay: str ):
418
+ # Normalize the essay
419
+ essay_lower = essay.lower()
420
+
421
+ # Helper function to count occurrences of transitional words
422
+ def count_transitional_words(word_list):
423
+ return sum(essay_lower.count(word) for word in word_list)
424
+
425
+ # Calculate counts for each type of transitional word list
426
+ addition_count = count_transitional_words(addition_transitional_words)
427
+ contrast_count = count_transitional_words(contrast_transitional_words)
428
+ cause_effect_count = count_transitional_words(cause_effect_transitional_words)
429
+ time_count = count_transitional_words(time_transitional_words)
430
+ emphasis_count = count_transitional_words(emphasis_transitional_words)
431
+ example_count = count_transitional_words(example_transitional_words)
432
+ conclusion_count = count_transitional_words(conclusion_transitional_words)
433
+ transition_between_sections_count = count_transitional_words(transition_between_sections_transitional_words)
434
+ misc_count = count_transitional_words(miscellaneous_transition_words_list)
435
+ contrast_within_sentence_count = count_transitional_words(contrast_within_sentence_transitional_words)
436
+ comparison_count = count_transitional_words(comparison_transitional_words)
437
+ cause_and_effect_within_sentence_count = count_transitional_words(cause_and_effect_within_sentence_transitional_words)
438
+ emphasis_within_sentence_count = count_transitional_words(emphasis_within_sentence_transitional_words)
439
+ concession_digression_repetition_count = count_transitional_words(concession_digression_repetition_transitional_words)
440
+
441
+ # Calculate total transitional word count
442
+ total_transitional_count = (
443
+ addition_count + contrast_count + cause_effect_count + time_count +
444
+ emphasis_count + example_count + conclusion_count +
445
+ transition_between_sections_count + misc_count +
446
+ contrast_within_sentence_count + comparison_count +
447
+ cause_and_effect_within_sentence_count + emphasis_within_sentence_count +
448
+ concession_digression_repetition_count
449
+ )
450
+
451
+ print("\n\n\n Total Transitional Words Count: " , total_transitional_count )
452
+
453
+ words = essay.split()
454
+ word_count = len(words)
455
+
456
+ transitional_words_percentage = round( ( total_transitional_count / ( word_count * 1.00) ) * 100 , 2 )
457
+
458
+ print("]n\n\n transitional_words_percentage: " , transitional_words_percentage)
459
+
460
+ '''
461
+ Since a transition_words_percentage of 10% is considered as the ideal percentage of transitional words in an essay,
462
+ we are deducting points with respect to how much is it deviating from its ideal percentage value.
463
+
464
+ This have proven to be powerful to determine the Development, Structure and Coherence in essays
465
+
466
+ '''
467
+ return 100 - abs( transitional_words_percentage - 10 )
468
+
469
+
470
+ def is_capitalized(text: str) -> bool:
471
+ """Check if the entire text is in capital letters."""
472
+ return text.isupper()
473
+
474
+ def contains_punctuation(text: str) -> bool:
475
+ """Check if the text contains any punctuation."""
476
+ return bool(re.search(r'[.,!?;:]', text))
477
+
478
+ def is_bullet_points(text: str) -> bool:
479
+ """Check if the text consists only of bullet points or very short sentences."""
480
+ sentences = text.split('\n')
481
+ bullet_points = any(line.strip().startswith('-') for line in sentences)
482
+ short_sentences = sum(len(sentence.split()) <= 2 for sentence in sentences if sentence.strip())
483
+ return bullet_points or short_sentences > len(sentences) / 2
484
+
485
+
486
+ def form_score_essay(essay: str) -> float:
487
+ # REMOVE PUNCTUATION AND COUNT WORDS
488
+ word_count = len(re.findall(r'\b\w+\b', essay))
489
+
490
+ # CHECK ESSAY FORMAT
491
+ is_capital = is_capitalized(essay)
492
+ has_punctuation = contains_punctuation(essay)
493
+ bullet_points_or_short = is_bullet_points(essay)
494
+
495
+ # CALCULATE SCORE
496
+ if 200 <= word_count <= 300 and has_punctuation and not is_capital and not bullet_points_or_short:
497
+ score = 100.0 # BEST SCORE
498
+ elif (120 <= word_count <= 199 or 301 <= word_count <= 380) and has_punctuation and not is_capital and not bullet_points_or_short:
499
+ score = 50.0 # AVERAGE SCORE
500
+ else:
501
+ score = 0.0 # WORST SCORE
502
+
503
+ return score
504
+
505
+
506
+ @app.post("/essay_scoring/")
507
+ async def essay_score( prompt : str = Form() , essay : str = Form() ):
508
+ content_score_result, form_score_result, dsc_score_result, grammar_score_result = (
509
+ float( get_similarity_score( prompt , essay ) ) * 100,
510
+ float( form_score_essay( essay ) ),
511
+ float( dsc_score( essay ) ),
512
+ float( grammar_score( essay ) )
513
+ )
514
+
515
+ print( essay )
516
+
517
+ return {
518
+
519
+ "Content Score: " : content_score_result,
520
+ "Form Score: " : form_score_result,
521
+ "DSC Score: " : dsc_score_result,
522
+ "Grammar Score: " : grammar_score_result,
523
+ "Overall Essay Score" : ( content_score_result + form_score_result + dsc_score_result + grammar_score_result) / 4.0
524
+ }
pronunciation_fluency_v2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f2404a15f08d5ff7adc3bfb9721b5d4c2e65a05acbcc808a2d2d9d2bd24d57
3
+ size 27837151
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/openai/whisper.git
2
+ whisper
3
+ fastapi
4
+ pydantic
5
+ uvicorn
6
+ python-multipart
7
+ gunicorn
8
+ gensim
9
+ scikit-learn
10
+ numpy
11
+ textblob
12
+ nltk
trasncribe.py ADDED
File without changes
whisper_tiny_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c80a3201cc10ca84a80717069768f68fbab09a35bff458f77a120e4aa210dee
3
+ size 151102205