import os os.system('pip install openpyxl') os.system('pip install scikit-learn') os.system('pip install sentence-transformers') from sklearn.neighbors import NearestNeighbors import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2 df = pd.read_parquet('df.parquet') df2 = pd.read_parquet('df2.parquet') df3 = pd.read_parquet('df3.parquet') #prepare model nbrs1 = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df2['text_vector_'].values.tolist()) nbrs2 = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df3['text_vector_'].values.tolist()) def search1(query, nbrs, full_df, cleaned_df): product = model.encode(query).tolist() # product = df.iloc[0]['text_vector_'] #use one of the products as sample distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object #print out the description of every recommended product output = cleaned_df.iloc[list(indices)[0]][['text']] full_text = full_df.loc[range(output.index[0]-1, output.index[0]+2)]['text'].values.tolist() return '\n\n'.join(full_text) def search_sentences(df): df2['text'].str.split('.', expand=True).stack().reset_index(level=1, drop=True).rename('B').reset_index(drop=True)[0:50] output = search1('how to speed up data movement', nbrs=nbrs1, full_df=df, cleaned_df=df2) output import gradio as gr import os #the first module becomes text1, the second module file1 def greet(type, text1): if type == "sentence": return search1(text1, nbrs2, df3, df3) elif type == "paragraph": return search1(text1, nbrs1, df, df2) iface = gr.Interface( fn=greet, inputs=[ gr.Radio(["sentence", "paragraph"]), gr.Textbox(label="text") ], outputs=["text"] ) iface.launch(share=False)