"""
# My first app
Here's our first attempt at using data to create a table:
"""
import streamlit as st
from retriever import do_search
def local_css(file_name):
with open(file_name) as f:
st.markdown(f'', unsafe_allow_html=True)
def render_retrieved_content(content, score):
print_score = ''
if score is not None:
score = round(score, 3)
print_score = f' Similarity Score: {score}'
return f'
{content}
{print_score}'
local_css('style.css')
st.header('🧐 Where my docs at?')
st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, '
'but you can not remember on the exact words. Just about rough content.
'
'💡 This demo compares different search approaches that can help you to find the right '
'information.', unsafe_allow_html=True)
option = st.selectbox(
'Choose a dataset',
('CDU election program 2021', 'Partisan news 2019 (dutch)'))
search = st.text_input('Enter your search query')
if search:
result = do_search(search)
st.markdown('### 🔎 Term Frequency–Inverse Document Frequency (TF-IDF)')
st.markdown('Is a statistical approach that calculates how relevant a word is to a document '
'in your collection. Only documents will be found that contain one of the words of '
'the given search query. You still have to remember on exact terms that are in the'
'searched phrase.')
st.markdown(render_retrieved_content(result[0][0].content, None),
unsafe_allow_html=True)
st.markdown('### 🧠 Semantic search')
st.markdown('An alternative approach is semantic search. Instead of using words of the '
'documents to calculate the score, we use a neural network that calculate the '
'similarity between the query and the documents of the collection. In other words, '
'the chance is high to find topic related documents without knowing the exact '
'terms.')
st.markdown(render_retrieved_content(result[1][0].content, result[1][0].score),
unsafe_allow_html=True)
st.markdown('### 🚀 Domain adapted semantic search')
st.markdown('If our document collection contains a lot of domain specific documents, '
'we can not use standard models. These models were trained on a large amount of '
'public available data, that covers probably not your domain specific words. To '
'improve the search results, we could fine-tune the network to calculate more '
'accurate similarities between queries and document regarding to your domain.')
st.markdown(render_retrieved_content(result[2][0].content, result[2][0].score),
unsafe_allow_html=True)