Spaces:
Running
Running
File size: 2,679 Bytes
b23dced 1b6a720 9615c66 1b6a720 cefbbca 9a8b099 f6250c7 9a8b099 b23dced 3a15f10 b23dced 1905303 0420aaa aaa887f a8e1bc9 2dc19fa 518b2cf ab8e1f9 518b2cf ab8e1f9 518b2cf ab8e1f9 518b2cf aaa887f 719ee6f 518b2cf e904e4b 9b8e2c2 b23dced 2973318 0dc667c d9a4f25 b23dced 0dc667c 1047b7a de5a93f 0dc667c 876676b 2125690 2531b4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import streamlit as st
import torch
import torch.nn.functional as F
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import numpy as np
import pandas as pd
from io import StringIO
st.title('Can I Patent This?')
st.write("This model is tuned with all patent applications submitted in Jan 2016 in [the Harvard USPTO patent dataset](https://github.com/suzgunmirac/hupd)")
st.write("You can upload a .csv file with a patent application to calculate the patentability score")
# prepopulate with a sample csv file that has one patent application
dataframe = pd.read_csv('patent_application.csv')
# to upload a .csv file with one application
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
#st.write(bytes_data)
# To convert to a string based IO:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
#st.write(stringio)
# To read file as string:
string_data = stringio.read()
#st.write(string_data)
# Can be used wherever a "file-like" object is accepted:
dataframe = pd.read_csv(uploaded_file)
# drop decision column if it exists
if 'decision' in dataframe.columns:
dataframe.drop(['decision'], axis=1, inplace = True)
st.write(dataframe)
user_input_abstract = st.text_area(label = 'abstract', value = dataframe['abstract'][0])
user_input_claims = st.text_area(label = 'claims', value = dataframe['claims'][0])
form = st.form(key='abstract-claims-form')
submit = form.form_submit_button('Submit')
model_name = "ayethuzar/tuned-for-patentability"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
test = [user_input_abstract[0] + user_input_claims[0]]
if submit:
batch = tokenizer(test, padding = True, truncation = True, max_length = 512, return_tensors = "pt")
with torch.no_grad():
outputs = model(**batch)
#st.write(outputs)
predictions = F.softmax(outputs.logits, dim = 1)
result = "Patentability Score: " + str(predictions.numpy()[0][1])
html_str = f"""<style>p.a {{font: bold {28}px Courier;color:#1D5D9B;}}</style><p class="a">{result}</p>"""
st.markdown(html_str, unsafe_allow_html=True)
tuple_of_choices = ('patent_number', 'title', 'background', 'summary', 'description')
# steamlit form
option = st.selectbox('Which other sections would you like to view?', tuple_of_choices)
st.write('You selected:', option)
user_input_other = st.text_area(label = 'other', value = dataframe[option][0])
|