Spaces:
Runtime error
Runtime error
""" | |
# My first app | |
Here's our first attempt at using data to create a table: | |
""" | |
import streamlit as st | |
from retriever import do_search | |
def local_css(file_name): | |
with open(file_name) as f: | |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
def render_retrieved_content(content, score): | |
print_score = '' | |
if score is not None: | |
score = round(score, 3) | |
print_score = f'<b> Similarity Score: {score}</b>' | |
return f'<blockquote>{content} </blockquote> {print_score}' | |
local_css('style.css') | |
st.header('🧐 Where my docs at?') | |
st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, ' | |
'but you can not remember on the exact words. Just about rough content. <br><br>' | |
'💡 This demo compares different search approaches that can help you to find the right ' | |
'information.', unsafe_allow_html=True) | |
option = st.selectbox( | |
'Choose a dataset', | |
('CDU election program 2021', 'Partisan news 2019 (dutch)')) | |
search = st.text_input('Enter your search query') | |
if search: | |
result = do_search(search) | |
st.markdown('### 🔎 Term Frequency–Inverse Document Frequency (TF-IDF)') | |
st.markdown('Is a statistical approach that calculates how relevant a word is to a document ' | |
'in your collection. Only documents will be found that contain one of the words of ' | |
'the given search query. You still have to remember on exact terms that are in the' | |
'searched phrase.') | |
st.markdown(render_retrieved_content(result[0][0].content, None), | |
unsafe_allow_html=True) | |
st.markdown('### 🧠 Semantic search') | |
st.markdown('An alternative approach is semantic search. Instead of using words of the ' | |
'documents to calculate the score, we use a neural network that calculate the ' | |
'similarity between the query and the documents of the collection. In other words, ' | |
'the chance is high to find topic related documents without knowing the exact ' | |
'terms.') | |
st.markdown(render_retrieved_content(result[1][0].content, result[1][0].score), | |
unsafe_allow_html=True) | |
st.markdown('### 🚀 Domain adapted semantic search') | |
st.markdown('If our document collection contains a lot of domain specific documents, ' | |
'we can not use standard models. These models were trained on a large amount of ' | |
'public available data, that covers probably not your domain specific words. To ' | |
'improve the search results, we could fine-tune the network to calculate more ' | |
'accurate similarities between queries and document regarding to your domain.') | |
st.markdown(render_retrieved_content(result[2][0].content, result[2][0].score), | |
unsafe_allow_html=True) | |