import torch import pandas as pd import streamlit as st import torch.nn as nn from transformers import RobertaTokenizer, RobertaModel, PretrainedConfig @st.cache_resource def init_model(): model = RobertaModel(config=PretrainedConfig().from_pretrained("roberta-large-mnli")) model.pooler = nn.Sequential( nn.Linear(1024, 256), nn.LayerNorm(256), nn.ReLU(), nn.Linear(256, 8), nn.Sigmoid() ) model_path = "model.pt" model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) model.eval() return model cats = ["Computer Science", "Economics", "Electrical Engineering", "Mathematics", "Physics", "Biology", "Finance", "Statistics"] def predict(outputs): top = 0 temp = 100000 apr_probs = torch.nn.functional.softmax(torch.tensor([39253., 84., 220., 2263., 1214., 909., 66., 10661.]) / temp, dim=0) probs = nn.functional.softmax(outputs / apr_probs, dim=1).tolist()[0] top_cats = [] top_probs = [] first = True write_cs = False for prob, cat in sorted(zip(probs, cats), reverse=True): if first: if cat == "Computer Science": write_cs = True first = False if top < 95: percent = prob * 100 top += percent top_cats.append(cat) top_probs.append(str(round(percent, 1))) res = pd.DataFrame(top_probs, index=top_cats, columns=['Percent']) st.write(res) if write_cs: st.write("Today everything is connected with Computer Science") tokenizer = RobertaTokenizer.from_pretrained("roberta-large-mnli") model = init_model() st.title("Article classifier") st.markdown("### Title") title = st.text_input("*Enter title (required)") st.markdown("### Abstract") abstract = st.text_area(" Enter abstract", height=200) if not title: st.warning("Please fill in required fields") else: try: st.markdown("### Result") encoded_input = tokenizer(title + ". " + abstract, return_tensors="pt", padding=True, max_length=1024, truncation=True) with torch.no_grad(): outputs = model(**encoded_input).pooler_output[:, 0, :] predict(outputs) except Exception: st.error("Something went wrong. Try different text")