Spaces:
Runtime error
Runtime error
import streamlit as st | |
from pathlib import Path | |
import json | |
from support_functions import HealthseaSearch | |
def visualize_dataset(): | |
# Configuration | |
health_aspect_path = Path("data/health_aspects.json") | |
product_path = Path("data/products.json") | |
condition_path = Path("data/condition_vectors.json") | |
benefit_path = Path("data/benefit_vectors.json") | |
# Load data | |
def load_data( | |
_health_aspect_path: Path, | |
_product_path: Path, | |
_condition_path: Path, | |
_benefit_path: Path, | |
): | |
with open(_health_aspect_path) as reader: | |
health_aspects = json.load(reader) | |
with open(_product_path) as reader: | |
products = json.load(reader) | |
with open(_condition_path) as reader: | |
conditions = json.load(reader) | |
with open(_benefit_path) as reader: | |
benefits = json.load(reader) | |
return health_aspects, products, conditions, benefits | |
# Functions | |
def kpi(n, text): | |
html = f""" | |
<div class='kpi'> | |
<h1 class='kpi_header'>{n}</h1> | |
<span>{text}</span> | |
</div> | |
""" | |
return html | |
def central_text(text): | |
html = f"""<h2 class='central_text'>{text}</h2>""" | |
return html | |
# Loading data | |
health_aspects, products, conditions, benefits = load_data( | |
health_aspect_path, product_path, condition_path, benefit_path | |
) | |
search_engine = HealthseaSearch(health_aspects, products, conditions, benefits) | |
# KPI | |
st.info("""This app presents the analyzed dataset of up to one million reviews. You can search for the products and substances with the highest score based on health aspect. The score is based on what reviewers wrote in their reviews.""") | |
st.warning("""Please note that the results produced by Healthsea should not be used as a foundation for solving health problems neither do we want to advocate that supplement products are able to solve everyone's health issues. | |
Healthsea is a research project that presents a technical approach on analyzing user-generated reviews and acts as a proof-of-concept.""") | |
st.markdown("""---""") | |
st.markdown(central_text("π Dataset"), unsafe_allow_html=True) | |
kpi_products, kpi_reviews, kpi_condition, kpi_benefit = st.columns(4) | |
kpi_products.markdown(kpi(len(products), "Products"), unsafe_allow_html=True) | |
kpi_reviews.markdown(kpi(933.240, "Reviews"), unsafe_allow_html=True) | |
kpi_condition.markdown(kpi(len(conditions), "Conditions"), unsafe_allow_html=True) | |
kpi_benefit.markdown(kpi(len(benefits), "Benefits"), unsafe_allow_html=True) | |
st.markdown("""---""") | |
# Expander | |
show_conditions, show_benefits = st.columns(2) | |
with show_conditions.expander("Top mentioned Conditions"): | |
st.write(search_engine.get_all_conditions_df()) | |
with show_benefits.expander("Top mentioned Benefits"): | |
st.write(search_engine.get_all_benefits_df()) | |
st.markdown("""---""") | |
# Search | |
search = st.text_input(label="Search for an health aspect", value="joint pain") | |
n = st.slider("Show top n results", min_value=10, max_value=1000, value=25) | |
st.markdown("""---""") | |
st.markdown(central_text("π§ Products"), unsafe_allow_html=True) | |
st.info("""The products are scored based on what reviewers say. Additional variables in the scoring function are product rating, helpful count and whether the review is considered 'fake'. """) | |
# DataFrame | |
st.write(search_engine.get_products_df(search, n)) | |
# KPI & Alias | |
aspect_alias = search_engine.get_aspect(search)["alias"] | |
if len(aspect_alias) > 0: | |
kpi_mentions, kpi_product_mentions, kpi_alias = st.columns(3) | |
kpi_mentions.markdown( | |
kpi(search_engine.get_aspect_meta(search)["frequency"], "Mentions"), | |
unsafe_allow_html=True, | |
) | |
kpi_product_mentions.markdown( | |
kpi(len(search_engine.get_aspect(search)["products"]), "Products"), | |
unsafe_allow_html=True, | |
) | |
kpi_alias.markdown( | |
kpi(len(aspect_alias), "Similar health aspects"), | |
unsafe_allow_html=True, | |
) | |
vectors = [] | |
main_aspect = search_engine.get_aspect_meta(search) | |
vectors.append((main_aspect["name"], main_aspect["vector"])) | |
for aspect in aspect_alias: | |
current_aspect = search_engine.get_aspect_meta(aspect) | |
vectors.append((current_aspect["name"], current_aspect["vector"])) | |
st.markdown("\n") | |
st.info("""To improve the search, the table also shows results of other health aspects with a high similarity""") | |
#st.write(search_engine.tsne_plot(vectors)) | |
search_engine.pyvis(vectors) | |
else: | |
kpi_mentions, kpi_product_mentions = st.columns(2) | |
kpi_mentions.markdown( | |
kpi(search_engine.get_aspect_meta(search)["frequency"], "Mentions"), | |
unsafe_allow_html=True, | |
) | |
kpi_product_mentions.markdown( | |
kpi(len(search_engine.get_aspect(search)["products"]), "Products"), | |
unsafe_allow_html=True, | |
) | |
st.markdown("""---""") | |
# Substances | |
st.markdown(central_text("π― Substances"), unsafe_allow_html=True) | |
st.info("""The scores of the substances are based on the products""") | |
# DataFrame | |
st.write(search_engine.get_substances_df(search, n)) | |
kpi_tmp, kpi_substances = st.columns(2) | |
kpi_substances.markdown( | |
kpi(len(search_engine.get_aspect(search)["substance"]), "Substances"), | |
unsafe_allow_html=True, | |
) | |