Spaces:
Runtime error
Runtime error
File size: 1,684 Bytes
81d5bd9 7b26957 85854a6 5636d63 81d5bd9 d341f84 85854a6 d341f84 85854a6 d341f84 81d5bd9 399c0dd 81d5bd9 d341f84 81d5bd9 a501843 81d5bd9 a8e0066 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import string, re
from cleanco import basename
model = None
def prepare(text):
text = text.translate(str.maketrans('', '', string.punctuation + 'ββ'))
pattern = r"\b(?=[MDCLXVII])M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})([II]X|[II]V|V?[II]{0,3})\b\.?"
text = re.sub(pattern, '', text)
text = basename(text).upper()
return text
def semantic(company_1, company_2):
global model
# Single list of sentences
sentences = [prepare(company_1), prepare(company_2)]
if model is None:
model = SentenceTransformer('all-mpnet-base-v2')
#Compute embeddings
embeddings = model.encode(sentences, convert_to_tensor=True)
#Compute cosine-similarities for each sentence with each other sentence
cosine_scores = util.cos_sim(embeddings, embeddings)
#Find the pairs with the highest cosine similarity scores
pairs = []
for i in range(len(cosine_scores)-1):
for j in range(i+1, len(cosine_scores)):
pairs.append({'index': [i, j], 'score': cosine_scores[i][j]})
#Sort scores in decreasing order
pairs = sorted(pairs, key=lambda x: x['score'], reverse=True)
for pair in pairs:
return "{:.4f}".format(pair['score'])
company_1 = "Growth Capital Acquisition Corp"
company_2 = None # "Growth Capital Acquisition Corp III"
title = 'sentences_semantic'
gr.Interface(semantic,inputs=[gr.inputs.Textbox(lines=1, default=company_1, label="Company_1"), gr.inputs.Textbox(lines=1, default=company_2, label="Company_2")],
outputs=[gr.outputs.Textbox(type="auto",label="Score")],title = title).launch() |