Spaces:
Sleeping
Sleeping
added regex expression
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
import torch
|
|
|
4 |
|
5 |
tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
|
6 |
model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
|
@@ -10,6 +11,8 @@ model = model.to(device)
|
|
10 |
|
11 |
# Define a function to split a text into segments of 512 tokens
|
12 |
def split_text(text):
|
|
|
|
|
13 |
# Tokenize the text
|
14 |
tokens = tokenizer.tokenize(text)
|
15 |
# Initialize an empty list for segments
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
import torch
|
4 |
+
import re
|
5 |
|
6 |
tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
|
7 |
model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
|
|
|
11 |
|
12 |
# Define a function to split a text into segments of 512 tokens
|
13 |
def split_text(text):
|
14 |
+
#remove unnessary charachters
|
15 |
+
text=re.sub(r'[^a-zA-Z\s]','',text)
|
16 |
# Tokenize the text
|
17 |
tokens = tokenizer.tokenize(text)
|
18 |
# Initialize an empty list for segments
|