arsath-sm commited on
Commit
e2b6396
·
verified ·
1 Parent(s): c0e91ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -4
app.py CHANGED
@@ -1,15 +1,42 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
4
 
5
  # Load the model and tokenizer
6
  model_name = 'abinayam/gpt-2-tamil'
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def correct_text(input_text):
11
- # Tokenize the input text
12
- input_ids = tokenizer.encode(input_text, return_tensors='pt')
 
 
 
13
 
14
  # Generate corrected text
15
  with torch.no_grad():
@@ -18,7 +45,10 @@ def correct_text(input_text):
18
  # Decode the generated text
19
  corrected_text = tokenizer.decode(output[0], skip_special_tokens=True)
20
 
21
- return corrected_text
 
 
 
22
 
23
  # Create the Gradio interface
24
  iface = gr.Interface(
@@ -26,7 +56,12 @@ iface = gr.Interface(
26
  inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."),
27
  outputs=gr.Textbox(label="Corrected Text"),
28
  title="Tamil Spell Corrector and Grammar Checker",
29
- description="This app uses the 'abinayam/gpt-2-tamil' model to correct spelling and grammar in Tamil text.",
 
 
 
 
 
30
  )
31
 
32
  # Launch the app
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ import re
5
 
6
  # Load the model and tokenizer
7
  model_name = 'abinayam/gpt-2-tamil'
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForCausalLM.from_pretrained(model_name)
10
 
11
+ # Common error corrections
12
+ common_errors = {
13
+ 'பழங்கல்': 'பழங்கள்',
14
+ # Add more common spelling errors here
15
+ }
16
+
17
+ def apply_sandhi_rules(text):
18
+ # Apply sandhi rules
19
+ text = re.sub(r'(கு|க்கு)\s+(ப|த|க|ச)', r'\1ப் \2', text)
20
+ # Add more sandhi rules as needed
21
+ return text
22
+
23
+ def preprocess_text(text):
24
+ # Apply common error corrections
25
+ for error, correction in common_errors.items():
26
+ text = text.replace(error, correction)
27
+ return text
28
+
29
+ def postprocess_text(text):
30
+ # Apply sandhi rules
31
+ text = apply_sandhi_rules(text)
32
+ return text
33
+
34
  def correct_text(input_text):
35
+ # Preprocess the input text
36
+ preprocessed_text = preprocess_text(input_text)
37
+
38
+ # Tokenize the preprocessed text
39
+ input_ids = tokenizer.encode(preprocessed_text, return_tensors='pt')
40
 
41
  # Generate corrected text
42
  with torch.no_grad():
 
45
  # Decode the generated text
46
  corrected_text = tokenizer.decode(output[0], skip_special_tokens=True)
47
 
48
+ # Postprocess the corrected text
49
+ final_text = postprocess_text(corrected_text)
50
+
51
+ return final_text
52
 
53
  # Create the Gradio interface
54
  iface = gr.Interface(
 
56
  inputs=gr.Textbox(lines=5, placeholder="Enter Tamil text here..."),
57
  outputs=gr.Textbox(label="Corrected Text"),
58
  title="Tamil Spell Corrector and Grammar Checker",
59
+ description="This app uses the 'abinayam/gpt-2-tamil' model along with custom rules to correct spelling and grammar in Tamil text.",
60
+ examples=[
61
+ ["நான் நேற்று கடைக்கு போனேன். அங்கே நிறைய பழங்கல் வாங்கினேன்."],
62
+ ["நான் பள்ளிகு செல்கிறேன்."],
63
+ ["அவன் வீட்டுகு வந்தான்."]
64
+ ]
65
  )
66
 
67
  # Launch the app