Spaces:
Build error
Build error
import difflib | |
import json | |
import numpy as np | |
import streamlit as st | |
from pyserini.search.lucene import LuceneSearcher | |
def read_json(file_name): | |
with open(file_name, "r") as f: | |
json_data = json.load(f) | |
return json_data | |
class SearchApplication: | |
def __init__(self): | |
self.title = "Awesome ChatGPT repositories search" | |
self.set_page_config() | |
self.searcher = self.set_searcher() | |
st.header(self.title) | |
col1, col2 = st.columns(2) | |
with col1: | |
self.query = st.text_input("Search English words", value="") | |
with col2: | |
st.write("#") | |
self.search_button = st.button("๐") | |
st.caption( | |
"You can search for open-source software from [500+ " | |
" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)." | |
) | |
st.write("#") | |
candidate_words_file = "candidate_words.json" | |
candidate_words_json = read_json(candidate_words_file) | |
self.candidate_words = candidate_words_json["candidate_words"] | |
self.show_popular_words() | |
self.show_search_results() | |
def set_page_config(self): | |
st.set_page_config( | |
page_title=self.title, | |
page_icon="๐", | |
layout="centered", | |
) | |
def set_searcher(self): | |
searcher = LuceneSearcher("indexes/docs") | |
return searcher | |
def show_popular_words(self): | |
st.caption("Popular words") | |
word1, word2, word3, word4, word5, word6 = st.columns(6) | |
with word1: | |
button1 = st.button("Prompt") | |
if button1: | |
self.query = "prompt" | |
with word2: | |
button2 = st.button("Chatbot") | |
if button2: | |
self.query = "chatbot" | |
with word3: | |
button3 = st.button("Langchain") | |
if button3: | |
self.query = "langchain" | |
with word4: | |
button4 = st.button("Extension") | |
if button4: | |
self.query = "extension" | |
with word5: | |
button5 = st.button("LLMs") | |
if button5: | |
self.query = "llms" | |
with word6: | |
button6 = st.button("API") | |
if button6: | |
self.query = "api" | |
def show_search_results(self): | |
if self.query or self.search_button: | |
st.write("#") | |
search_results = self.searcher.search(self.query, k=500) | |
num_search_results = len(search_results) | |
st.write(f"A total of {num_search_results} repositories found.") | |
if num_search_results > 0: | |
json_search_results = [] | |
for result in search_results: | |
json_data = json.loads(result.raw) | |
json_search_results.append(json_data) | |
for json_data in sorted( | |
json_search_results, key=lambda x: x["freq"], reverse=True | |
): | |
description = json_data["description"] | |
url = json_data["url"] | |
project_name = json_data["project_name"] | |
st.write("---") | |
st.subheader(f"[{project_name}]({url})") | |
st.write(description) | |
info = [] | |
language = json_data["language"] | |
if language is not None and len(language) > 0: | |
info.append(language) | |
else: | |
info.append("Laugage: Unkwown") | |
license = json_data["license"] | |
if license is not None: | |
info.append(license["name"]) | |
else: | |
info.append("License: Unkwown") | |
st.caption(" / ".join(info)) | |
else: | |
if len(self.query) > 0: | |
scores = [] | |
for candidate_word in self.candidate_words: | |
score = difflib.SequenceMatcher( | |
None, self.query, candidate_word | |
).ratio() | |
scores.append(score) | |
num_candidate_words = 6 | |
indexes = np.argsort(scores)[::-1][:num_candidate_words] | |
suggestions = [self.candidate_words[i] for i in indexes] | |
suggestions = sorted( | |
set(suggestions), key=suggestions.index | |
) | |
st.caption("Suggestions") | |
for i, word in enumerate(suggestions, start=1): | |
st.write(f"{i}: {word}") | |
def main(): | |
SearchApplication() | |
if __name__ == "__main__": | |
main() | |