nebiyu29 commited on
Commit
6042393
1 Parent(s): a323c57

added regex expression

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
 
4
 
5
  tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
6
  model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
@@ -10,6 +11,8 @@ model = model.to(device)
10
 
11
  # Define a function to split a text into segments of 512 tokens
12
  def split_text(text):
 
 
13
  # Tokenize the text
14
  tokens = tokenizer.tokenize(text)
15
  # Initialize an empty list for segments
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
+ import re
5
 
6
  tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
7
  model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
 
11
 
12
  # Define a function to split a text into segments of 512 tokens
13
  def split_text(text):
14
+ #remove unnessary charachters
15
+ text=re.sub(r'[^a-zA-Z\s]','',text)
16
  # Tokenize the text
17
  tokens = tokenizer.tokenize(text)
18
  # Initialize an empty list for segments