Michelangiolo commited on
Commit
047d7b8
1 Parent(s): 40836a5

first push

Browse files
Files changed (4) hide show
  1. app.py +44 -0
  2. df.parquet +3 -0
  3. df2.parquet +3 -0
  4. df3.parquet +3 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ os.system('pip install openpyxl')
2
+ from sklearn.neighbors import NearestNeighbors
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2
9
+
10
+ df = pd.read_parquet('df.parquet')
11
+ df2 = pd.read_parquet('df2.parquet')
12
+ df3 = pd.read_parquet('df3.parquet')
13
+
14
+ #prepare model
15
+ nbrs1 = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df2['text_vector_'].values.tolist())
16
+ nbrs2 = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(df3['text_vector_'].values.tolist())
17
+
18
+ def search1(query, nbrs, full_df, cleaned_df):
19
+ product = model.encode(query).tolist()
20
+ # product = df.iloc[0]['text_vector_'] #use one of the products as sample
21
+
22
+ distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object
23
+
24
+ #print out the description of every recommended product
25
+ output = cleaned_df.iloc[list(indices)[0]][['text']]
26
+ full_text = full_df.loc[range(output.index[0]-1, output.index[0]+2)]['text'].values.tolist()
27
+ return '\n\n'.join(full_text)
28
+
29
+ def search_sentences(df):
30
+ df2['text'].str.split('.', expand=True).stack().reset_index(level=1, drop=True).rename('B').reset_index(drop=True)[0:50]
31
+
32
+ output = search1('how to speed up data movement', nbrs=nbrs1, full_df=df, cleaned_df=df2)
33
+ output
34
+
35
+ import gradio as gr
36
+ import os
37
+
38
+ #the first module becomes text1, the second module file1
39
+ def greet(text1):
40
+ return search1(text1, nbrs2, df3, df3)
41
+ # return search1(text1, df, df2)
42
+
43
+ iface = gr.Interface(fn=greet, inputs=['text'], outputs=["text"])
44
+ iface.launch(share=False)
df.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35168aa38c8f8decbcd3ce2737befe38cb758120a79ea51c5a7f82e6b204b6db
3
+ size 2416098
df2.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b053d0d39698adeeae12140da1e48ec3c3c21c5d7e4b8246a2076ec2b35235c0
3
+ size 2187807
df3.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f5f3c6ebb46ca179837746f0ca02137879e26d9f722812eed0be77930e493c
3
+ size 5695689