varun500 commited on
Commit
1f7ba69
1 Parent(s): 595b933

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from connection import PineconeConnection
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+
6
+ @st.cache_resource
7
+ def init_retriever():
8
+ return SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
9
+
10
+
11
+ def card(thumbnail, title, url, context):
12
+ return st.markdown(
13
+ f"""
14
+ <div class="container-fluid">
15
+ <div class="row align-items-start">
16
+ <div class="col-md-4 col-sm-4">
17
+ <div class="position-relative">
18
+ <a href={url}><img src={thumbnail} class="img-fluid" style="width: 192px; height: 106px"></a>
19
+ </div>
20
+ </div>
21
+ <div class="col-md-8 col-sm-8">
22
+ <a href={url}>{title}</a>
23
+ <br>
24
+ <span style="color: #808080;">
25
+ <small>{context[:200].capitalize()+"...."}</small>
26
+ </span>
27
+ </div>
28
+ </div>
29
+ </div>
30
+ """,
31
+ unsafe_allow_html=True,
32
+ )
33
+
34
+
35
+ st.markdown(
36
+ """
37
+ # YouTube Q&A
38
+
39
+ The app matches the natural language question to the video transcripts and finds you similar videos. It will give you links to YouTube Videos that match your search question.
40
+
41
+ The app will query a vector database (Pinecone) and perform Semantic Search.
42
+
43
+ The dataset was taken from HuggingFace: [Youtube Video Transcriptions](https://huggingface.co/datasets/pinecone/yt-transcriptions).
44
+
45
+ This app is an official submission to the Streamlit Connections Hackathon.
46
+ - [Hackathon Link](https://discuss.streamlit.io/t/connections-hackathon/47574)
47
+ - [GitHub Repo](https://github.com/awinml/st-pinecone-connection)
48
+ - [Pinecone Vector Database](https://www.pinecone.io/)
49
+
50
+ Some URLs and Images may not show up when searching for certain keywords, due to missing values in the original dataset. It does affect the performance of the vector database or the app.
51
+
52
+ Some topics that you can search for:
53
+ - Reinforcement Learning
54
+ - GANs
55
+ - Tensorflow
56
+ """
57
+ )
58
+
59
+ st.markdown(
60
+ """
61
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
62
+ """,
63
+ unsafe_allow_html=True,
64
+ )
65
+
66
+ # Connect app to Pinecone Client
67
+ conn = st.experimental_connection(
68
+ "PineconeVectorDB",
69
+ type=PineconeConnection,
70
+ environment="us-west1-gcp-free",
71
+ api_key=st.secrets["api_key"],
72
+ )
73
+ cursor = conn.cursor()
74
+
75
+ retriever = init_retriever()
76
+
77
+ query_str = st.text_input("Please enter Search Query:", "")
78
+
79
+ if query_str != "":
80
+ xq = retriever.encode([query_str]).tolist()
81
+ xc = conn.query(
82
+ index_name="youtube-search", query_vector=xq, top_k=5, include_metadata=True
83
+ )
84
+
85
+ for context in xc:
86
+ card(
87
+ context["metadata"]["thumbnail"],
88
+ context["metadata"]["title"],
89
+ context["metadata"]["url"],
90
+ context["metadata"]["text"],
91
+ )