Spaces:

arsath-sm
/

Tamil-spell-checker

Sleeping

App Files Files Community

arsath-sm commited on Oct 16, 2024

Commit

e2b6396

verified ·

1 Parent(s): c0e91ec

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -4

app.py CHANGED Viewed

@@ -1,15 +1,42 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 # Load the model and tokenizer
 model_name = 'abinayam/gpt-2-tamil'
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 def correct_text(input_text):
-    # Tokenize the input text
-    input_ids = tokenizer.encode(input_text, return_tensors='pt')
     # Generate corrected text
     with torch.no_grad():
@@ -18,7 +45,10 @@ def correct_text(input_text):
     # Decode the generated text
     corrected_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    return corrected_text
 # Create the Gradio interface
 iface = gr.Interface(
@@ -26,7 +56,12 @@ iface = gr.Interface(
     inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."),
     outputs=gr.Textbox(label="Corrected Text"),
     title="Tamil Spell Corrector and Grammar Checker",
-    description="This app uses the 'abinayam/gpt-2-tamil' model to correct spelling and grammar in Tamil text.",
 )
 # Launch the app

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import re
 # Load the model and tokenizer
 model_name = 'abinayam/gpt-2-tamil'
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+# Common error corrections
+common_errors = {
+    'பழங்கல்': 'பழங்கள்',
+    # Add more common spelling errors here
+}
+def apply_sandhi_rules(text):
+    # Apply sandhi rules
+    text = re.sub(r'(கு|க்கு)\s+(ப|த|க|ச)', r'\1ப் \2', text)
+    # Add more sandhi rules as needed
+    return text
+def preprocess_text(text):
+    # Apply common error corrections
+    for error, correction in common_errors.items():
+        text = text.replace(error, correction)
+    return text
+def postprocess_text(text):
+    # Apply sandhi rules
+    text = apply_sandhi_rules(text)
+    return text
 def correct_text(input_text):
+    # Preprocess the input text
+    preprocessed_text = preprocess_text(input_text)
+    # Tokenize the preprocessed text
+    input_ids = tokenizer.encode(preprocessed_text, return_tensors='pt')
     # Generate corrected text
     with torch.no_grad():
     # Decode the generated text
     corrected_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Postprocess the corrected text
+    final_text = postprocess_text(corrected_text)
+    return final_text
 # Create the Gradio interface
 iface = gr.Interface(
     inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."),
     outputs=gr.Textbox(label="Corrected Text"),
     title="Tamil Spell Corrector and Grammar Checker",
+    description="This app uses the 'abinayam/gpt-2-tamil' model along with custom rules to correct spelling and grammar in Tamil text.",
+    examples=[
+        ["நான் நேற்று கடைக்கு போனேன். அங்கே நிறைய பழங்கல் வாங்கினேன்."],
+        ["நான் பள்ளிகு செல்கிறேன்."],
+        ["அவன் வீட்டுகு வந்தான்."]
+    ]
 )
 # Launch the app