Spaces:
Running
Running
File size: 2,826 Bytes
b23dced 1b6a720 9615c66 1b6a720 cefbbca 9a8b099 f6250c7 9a8b099 b23dced 3a15f10 b23dced 362d273 0420aaa 362d273 aaa887f a8e1bc9 2dc19fa 518b2cf ab8e1f9 518b2cf ab8e1f9 518b2cf ab8e1f9 518b2cf aaa887f 719ee6f 518b2cf 9b8e2c2 d6cd299 b23dced 00a6751 0dc667c 1486ef3 babbb18 b8c0242 b23dced 0dc667c 00a1bcd 0dc667c 1047b7a de5a93f 0dc667c f599b54 2125690 2531b4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import streamlit as st
import torch
import torch.nn.functional as F
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import numpy as np
import pandas as pd
from io import StringIO
st.title('Can I Patent This?')
st.write("This model is tuned with all patent applications submitted in Jan 2016 in [the Harvard USPTO patent dataset.](https://github.com/suzgunmirac/hupd)")
st.write("You can upload a .csv file with a patent application to calculate the patentability score.")
# prepopulate with a sample csv file that has one patent application
dataframe = pd.read_csv('patent_application.csv')
# to upload a .csv file with one application
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
#st.write(bytes_data)
# To convert to a string based IO:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
#st.write(stringio)
# To read file as string:
string_data = stringio.read()
#st.write(string_data)
# Can be used wherever a "file-like" object is accepted:
dataframe = pd.read_csv(uploaded_file)
# drop decision column if it exists
if 'decision' in dataframe.columns:
dataframe.drop(['decision'], axis=1, inplace = True)
st.write(dataframe)
form = st.form(key='abstract-claims-form')
user_input_abstract = form.text_area(label = 'abstract', value = dataframe['abstract'][0])
user_input_claims = form.text_area(label = 'claims', value = dataframe['claims'][0])
submit = form.form_submit_button('Submit')
model_name = 'AI-Growth-Lab/PatentSBERTa' #"ayethuzar/tuned-for-patentability"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
text = [user_input_abstract + user_input_claims]
#testing = "text " + user_input_abstract
#testing_str = f"{text}"
#st.markdown(testing_str, unsafe_allow_html=True)
if submit:
batch = tokenizer(text, padding = True, truncation = True, max_length = 512, return_tensors = "pt")
with torch.no_grad():
outputs = model(**batch)
#st.write(outputs)
predictions = F.softmax(outputs.logits, dim = 1)
result = "Patentability Score: " + str(predictions.numpy()[0][1])
html_str = f"""<style>p.a {{font: bold {28}px Courier;color:#1D5D9B;}}</style><p class="a">{result}</p>"""
st.markdown(html_str, unsafe_allow_html=True)
tuple_of_choices = ('patent_number', 'title', 'background', 'summary', 'description')
# steamlit form
option = st.selectbox('Which other sections would you like to view?', tuple_of_choices)
st.write('You selected:', option)
user_input_other = st.text_area(label = 'other', value = dataframe[option][0])
|