mrchtr commited on
Commit
01628bb
·
1 Parent(s): 10641ee

Update styles

Browse files
Files changed (3) hide show
  1. app.py +46 -14
  2. retriever.py +3 -3
  3. style.css +18 -0
app.py CHANGED
@@ -3,30 +3,62 @@
3
  Here's our first attempt at using data to create a table:
4
  """
5
  import streamlit as st
6
- import pandas as pd
7
- from load_css import local_css
8
  from retriever import do_search
9
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  local_css('style.css')
 
 
 
 
 
 
 
 
 
 
11
 
12
- st.header('Semantic search demo')
13
- search = st.text_input('')
14
 
 
15
  if search:
16
  result = do_search(search)
17
- col1, col2, col3 = st.columns(3)
18
 
19
- with col1:
20
- st.write('TF-IDF')
21
- st.write(result[0])
 
 
 
 
22
 
23
- with col2:
24
- st.write('Base dense retriever')
25
- st.write(result[1])
 
 
 
 
 
26
 
27
- with col3:
28
- st.write('Adapted dense retriever')
29
- st.write(result[2])
 
 
 
 
 
30
 
31
 
32
 
 
3
  Here's our first attempt at using data to create a table:
4
  """
5
  import streamlit as st
 
 
6
  from retriever import do_search
7
 
8
+ def local_css(file_name):
9
+ with open(file_name) as f:
10
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
11
+
12
+
13
+ def render_retrieved_content(content, score):
14
+ print_score = ''
15
+ if score is not None:
16
+ score = round(score, 3)
17
+ print_score = f'<b> Similarity Score: {score}</b>'
18
+ return f'<blockquote>{content} </blockquote> {print_score}'
19
+
20
  local_css('style.css')
21
+ st.header('🧐 Where my docs at?')
22
+ st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, '
23
+ 'but you can not remember on the exact words. Just about rough content. <br><br>'
24
+ '💡 This demo compares different search approaches that can help you to find the right '
25
+ 'information.', unsafe_allow_html=True)
26
+
27
+ option = st.selectbox(
28
+ 'Choose a dataset',
29
+ ('CDU election program 2021', 'Partisan news 2019 (dutch)'))
30
+
31
 
 
 
32
 
33
+ search = st.text_input('Enter your search query')
34
  if search:
35
  result = do_search(search)
 
36
 
37
+ st.markdown('### 🔎 Term Frequency–Inverse Document Frequency (TF-IDF)')
38
+ st.markdown('Is a statistical approach that calculates how relevant a word is to a document '
39
+ 'in your collection. Only documents will be found that contain one of the words of '
40
+ 'the given search query. You still have to remember on exact terms that are in the'
41
+ 'searched phrase.')
42
+ st.markdown(render_retrieved_content(result[0][0].content, None),
43
+ unsafe_allow_html=True)
44
 
45
+ st.markdown('### 🧠 Semantic search')
46
+ st.markdown('An alternative approach is semantic search. Instead of using words of the '
47
+ 'documents to calculate the score, we use a neural network that calculate the '
48
+ 'similarity between the query and the documents of the collection. In other words, '
49
+ 'the chance is high to find topic related documents without knowing the exact '
50
+ 'terms.')
51
+ st.markdown(render_retrieved_content(result[1][0].content, result[1][0].score),
52
+ unsafe_allow_html=True)
53
 
54
+ st.markdown('### 🚀 Domain adapted semantic search')
55
+ st.markdown('If our document collection contains a lot of domain specific documents, '
56
+ 'we can not use standard models. These models were trained on a large amount of '
57
+ 'public available data, that covers probably not your domain specific words. To '
58
+ 'improve the search results, we could fine-tune the network to calculate more '
59
+ 'accurate similarities between queries and document regarding to your domain.')
60
+ st.markdown(render_retrieved_content(result[2][0].content, result[2][0].score),
61
+ unsafe_allow_html=True)
62
 
63
 
64
 
retriever.py CHANGED
@@ -56,9 +56,9 @@ def dense_retrieval(query, retriever='base'):
56
 
57
 
58
  def do_search(query):
59
- sparse_result = sparse_retrieval(query)['documents'][0].content
60
- dense_base_result = dense_retrieval(query, retriever='base')['documents'][0].content
61
- dense_adapted_result = dense_retrieval(query, retriever='adapted')['documents'][0].content
62
  return sparse_result, dense_base_result, dense_adapted_result
63
 
64
  if __name__ == '__main__':
 
56
 
57
 
58
  def do_search(query):
59
+ sparse_result = sparse_retrieval(query)['documents']
60
+ dense_base_result =dense_retrieval(query, retriever='base')['documents']
61
+ dense_adapted_result = dense_retrieval(query, retriever='adapted')['documents']
62
  return sparse_result, dense_base_result, dense_adapted_result
63
 
64
  if __name__ == '__main__':
style.css ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blockquote {
2
+ background: #f9f9f9;
3
+ border-left: 10px solid #ccc;
4
+ margin: 1.5em 10px;
5
+ padding: 0.5em 10px;
6
+ quotes: "\201C""\201D""\2018""\2019";
7
+ }
8
+ blockquote:before {
9
+ color: #ccc;
10
+ content: '';
11
+ font-size: 4em;
12
+ line-height: 0.1em;
13
+ margin-right: 0.25em;
14
+ vertical-align: -0.4em;
15
+ }
16
+ blockquote p {
17
+ display: inline;
18
+ }