Spaces:
Runtime error
Runtime error
File size: 2,106 Bytes
6bc94ac 436ce71 6bc94ac db5ef00 aafa95b 6bc94ac 436ce71 aafa95b 436ce71 aafa95b db5ef00 5beab45 db5ef00 aafa95b db5ef00 436ce71 db5ef00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import re
import spacy
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
import streamlit as st
from urllib.request import Request, urlopen, HTTPError
from bs4 import BeautifulSoup
def hide_footer():
hide_st_style = """
<style>
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_st_style, unsafe_allow_html=True)
@st.cache_resource
def get_seq2seq_model(model_id):
return AutoModelForSeq2SeqLM.from_pretrained(model_id)
@st.cache_resource
def get_causal_model(model_id):
return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
@st.cache_resource
def get_auto_model(model_id):
return AutoModel.from_pretrained(model_id)
@st.cache_resource
def get_tokenizer(model_id):
return AutoTokenizer.from_pretrained(model_id)
@st.cache_data
def get_celeb_data(fpath):
with open(fpath, encoding='UTF-8') as json_file:
return json.load(json_file)
def get_article(url):
req = Request(
url=url,
headers={'User-Agent': 'Mozilla/5.0'}
)
try:
html = urlopen(req).read()
soup = BeautifulSoup(html, features="html.parser")
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
lines = []
# get text
for para in soup.find_all("p", class_='topic-paragraph'):
lines.append(para.get_text().strip())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = ' '.join(chunk for chunk in chunks if chunk)
return text
except:
st.markdown("The internet is not stable.")
return ""
@st.cache_resource
def get_spacy_model(model_id):
return spacy.load(model_id)
def preprocess_text(name, text:str, model_id):
spacy_model = get_spacy_model(model_id)
texts = [i.text.strip() for i in spacy_model(text).sents]
return spacy_model, texts
|