Spaces:
Running
Running
File size: 1,910 Bytes
134f875 047d7b8 af79a7d 6f22793 047d7b8 a6e68b0 047d7b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
os.system('pip install openpyxl')
os.system('pip install scikit-learn')
os.system('pip install sentence-transformers')
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2
df = pd.read_parquet('df.parquet')
df2 = pd.read_parquet('df2.parquet')
df3 = pd.read_parquet('df3.parquet')
#prepare model
nbrs1 = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df2['text_vector_'].values.tolist())
nbrs2 = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df3['text_vector_'].values.tolist())
def search1(query, nbrs, full_df, cleaned_df):
product = model.encode(query).tolist()
# product = df.iloc[0]['text_vector_'] #use one of the products as sample
distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object
#print out the description of every recommended product
output = cleaned_df.iloc[list(indices)[0]][['text']]
full_text = full_df.loc[range(output.index[0]-1, output.index[0]+2)]['text'].values.tolist()
return '\n\n'.join(full_text)
def search_sentences(df):
df2['text'].str.split('.', expand=True).stack().reset_index(level=1, drop=True).rename('B').reset_index(drop=True)[0:50]
output = search1('how to speed up data movement', nbrs=nbrs1, full_df=df, cleaned_df=df2)
output
import gradio as gr
import os
#the first module becomes text1, the second module file1
def greet(type, text1):
if type == "sentence":
return search1(text1, nbrs2, df3, df3)
elif type == "paragraph":
return search1(text1, nbrs1, df, df2)
iface = gr.Interface(
fn=greet,
inputs=[
gr.Radio(["sentence", "paragraph"]),
gr.Textbox(label="text")
],
outputs=["text"]
)
iface.launch(share=False) |