Spaces:
Sleeping
Sleeping
import streamlit as st | |
import textwrap | |
import pandas as pd | |
import time | |
from sentence_transformers import SentenceTransformer, util | |
from annoy import AnnoyIndex | |
footer = """ | |
<p style='text-align: center; color: gray;'>Made with inspiration by Abhijeet Singh</p> | |
""" | |
shlok_keys = ['Title', 'Chapter', 'Verse', 'Hindi Anuvad' , 'Enlgish Translation'] | |
max_line_length = 100 # Adjust as needed | |
def load_data(): | |
hn_filepath = 'Gita.xlsx' | |
return pd.read_excel(hn_filepath) | |
def load_hn_model(): | |
return SentenceTransformer('all-mpnet-base-v2') | |
hn_model = load_hn_model() | |
def build_embeddings(hn_data): | |
return [hn_model.encode(hn_data['Enlgish Translation'][i], convert_to_tensor=True).numpy() for i in range(len(hn_data))] | |
def build_annoy_index(shloka_embeddings): | |
embedding_size = len(shloka_embeddings[0]) | |
annoy_index = AnnoyIndex(embedding_size, metric='angular') | |
for i, embedding in enumerate(shloka_embeddings): | |
annoy_index.add_item(i, embedding) | |
annoy_index.build(18) # 18 trees for faster search | |
return annoy_index | |
def wrap_text(text): | |
pass | |
# st.write("shree ganeshay namah") | |
hn_data = load_data() | |
shloka_embeddings = build_embeddings(hn_data) | |
annoy_index = build_annoy_index(shloka_embeddings) | |
st.title("GitaShlok Bhagavad Gita Assistant") | |
st.markdown(footer, unsafe_allow_html=True) | |
st.markdown( | |
""" | |
<style> | |
.reportview-container { | |
width: 90%; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.markdown( | |
""" | |
<style> | |
.streamlit-text-container { | |
white-space: pre-line; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
query = st.text_input("Ask any question related to the Bhagavad Gita: ") | |
if st.button('Ask'): | |
query_embedding = hn_model.encode(query, convert_to_tensor=True).numpy() | |
# Use Annoy Index for efficient similarity search | |
similar_indices = annoy_index.get_nns_by_vector(query_embedding, 18) | |
# Process and display similar Shlokas | |
similarities = [] | |
for curr_index in similar_indices: | |
similarity = util.cos_sim(query_embedding, shloka_embeddings[curr_index]) | |
curr_shlok_details = {key: hn_data[key][curr_index] for key in hn_data} | |
similarities.append({"shlok_details": curr_shlok_details, "similarity": similarity}) | |
# Get the most similar Shloka | |
top_result = sorted(similarities, key=lambda x: x["similarity"], reverse=True)[0] | |
top_shlok_details = top_result["shlok_details"] | |
adhyay_number = top_shlok_details['Chapter'].split(" ")[1] | |
shlok_number = top_shlok_details['Verse'].split(" ")[1].split(".")[1] | |
st.write("------------------------------") | |
st.write(f"{top_shlok_details['Chapter']} , Shloka : {shlok_number}") | |
wrapped_text = textwrap.fill(top_shlok_details['Enlgish Translation'], width=max_line_length) | |
wrapped_hindi_text=textwrap.fill(top_shlok_details['Hindi Anuvad'], width=max_line_length) | |
placeholder = st.empty() | |
prev_text='' | |
for char in wrapped_text: | |
prev_text=prev_text+char | |
placeholder.text(prev_text) | |
time.sleep(0.01) # Adjust the sleep duration as needed | |
st.write("\n------------------------------") | |
hindi_placeholder = st.empty() | |
hindi_text='' | |
for char in wrapped_hindi_text : | |
hindi_text=hindi_text+char | |
hindi_placeholder.text(hindi_text) | |
time.sleep(0.005) # Adjust the sleep duration as needed | |
st.write("\n------------------------------") | |