Spaces:
Runtime error
Runtime error
File size: 3,781 Bytes
4e74356 de3cc49 0820f13 4e74356 0820f13 4e74356 0820f13 4e74356 f2092a2 9fa587a f2092a2 0820f13 4e74356 0820f13 4e74356 f2092a2 4e74356 9fa587a 0820f13 4e74356 9fa587a f2092a2 0820f13 4e74356 0820f13 f2092a2 4e74356 0820f13 f2092a2 9fa587a f2092a2 9fa587a 3d4e8d2 38b18d6 3d4e8d2 38b18d6 3d4e8d2 39a7cf9 3d4e8d2 f2092a2 f7306fb f2092a2 3a9ba56 3d4e8d2 9fa587a f2092a2 3d4e8d2 f2092a2 9fa587a f2092a2 9fa587a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
os.system('pip install openpyxl')
os.system('pip install sentence-transformers')
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2
df = pd.read_parquet('df_encoded3.parquet')
df['tags'] = df['tags'].apply(lambda x : str(x))
def parse_raised(x):
if x == 'Undisclosed':
return 0
else:
quantifier = x[-1]
x = float(x[1:-1])
if quantifier == 'K':
return x/1000
elif quantifier == 'M':
return x
df['raised'] = df['raised'].apply(lambda x : parse_raised(x))
df['stage'] = df['stage'].apply(lambda x : x.lower())
df = df.reset_index(drop=True)
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from sentence_transformers import SentenceTransformer
nbrs = NearestNeighbors(n_neighbors=5000, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())
def search(df, query):
product = model.encode(query).tolist()
# product = df.iloc[0]['text_vector_'] #use one of the products as sample
#prepare model
#
distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object
#print out the description of every recommended product
return df.iloc[list(indices)[0]][['name', 'raised', 'target', 'size', 'stage', 'country', 'source', 'description', 'tags']]
def filter_df(df, column_name, filter_type, filter_value, minimum_acceptable_size=0):
if filter_type == '==':
df_filtered = df[df[column_name]==filter_value]
elif filter_type == '>=':
df_filtered = df[df[column_name]>=filter_value]
elif filter_type == '<=':
df_filtered = df[df[column_name]<=filter_value]
elif filter_type == 'contains':
df_filtered = df[df['target'].str.contains(filter_value)]
if df_filtered.size >= minimum_acceptable_size:
return df_filtered
else:
return df
#the first module becomes text1, the second module file1
def greet(size, target, stage, query):
def raised_zero(x):
if x == 0:
return 'Undisclosed'
else:
return x
df_knn = search(df, query)
df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))
df_size = filter_df(df_knn, 'size', '==', size, 1)
if stage != 'ALL':
df_stage = filter_df(df_size, 'stage', '==', stage.lower(), 1)
else:
#we bypass the filter
df_stage = df_size
print(df_stage.size)
df_target = filter_df(df_stage, 'target', 'contains', target, 1)
# display(df_stage)
# df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]
#we live the sorting for last
return df_target[0:100] #.sort_values('raised', ascending=False)
with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:
gr.Markdown(
"""
# Startup Search Engine
"""
)
size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+', '11-500+'], multiselect=False, value='11-500+', label='size')
target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], multiselect=False, value='B2B', label='target')
stage = gr.Radio(['pre-seed', 'A', 'B', 'C', 'ALL'], multiselect=False, value='ALL', label='stage')
# raised = gr.Slider(0, 20, value=5, step_size=1, label="Minimum raising (in Millions)")
query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')
btn = gr.Button(value="Search for a Startup")
output1 = gr.DataFrame(label='value')
# btn.click(greet, inputs='text', outputs=['dataframe'])
btn.click(greet, [size, target, stage, query], [output1])
demo.launch(share=False) |