""" # My first app Here's our first attempt at using data to create a table: """ import streamlit as st from retriever import do_search def local_css(file_name): with open(file_name) as f: st.markdown(f'', unsafe_allow_html=True) def render_retrieved_content(content, score): print_score = '' if score is not None: score = round(score, 3) print_score = f' Similarity Score: {score}' return f'
{content}
{print_score}' local_css('style.css') st.header('🧐 Where my docs at?') st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, ' 'but you can not remember on the exact words. Just about rough content.

' '💡 This demo compares different search approaches that can help you to find the right ' 'information.', unsafe_allow_html=True) option = st.selectbox( 'Choose a dataset', ('CDU election program 2021', 'Partisan news 2019 (dutch)')) search = st.text_input('Enter your search query') if search: result = do_search(search) st.markdown('### 🔎 Term Frequency–Inverse Document Frequency (TF-IDF)') st.markdown('Is a statistical approach that calculates how relevant a word is to a document ' 'in your collection. Only documents will be found that contain one of the words of ' 'the given search query. You still have to remember on exact terms that are in the' 'searched phrase.') st.markdown(render_retrieved_content(result[0][0].content, None), unsafe_allow_html=True) st.markdown('### 🧠 Semantic search') st.markdown('An alternative approach is semantic search. Instead of using words of the ' 'documents to calculate the score, we use a neural network that calculate the ' 'similarity between the query and the documents of the collection. In other words, ' 'the chance is high to find topic related documents without knowing the exact ' 'terms.') st.markdown(render_retrieved_content(result[1][0].content, result[1][0].score), unsafe_allow_html=True) st.markdown('### 🚀 Domain adapted semantic search') st.markdown('If our document collection contains a lot of domain specific documents, ' 'we can not use standard models. These models were trained on a large amount of ' 'public available data, that covers probably not your domain specific words. To ' 'improve the search results, we could fine-tune the network to calculate more ' 'accurate similarities between queries and document regarding to your domain.') st.markdown(render_retrieved_content(result[2][0].content, result[2][0].score), unsafe_allow_html=True)