File size: 2,912 Bytes
a993e3f
 
1e670df
 
 
eae333f
1e670df
ec0467a
1e670df
 
 
 
 
 
 
 
 
 
 
b51836e
d6147f7
 
a993e3f
fdc7b5a
a993e3f
72bae8c
 
fdc7b5a
db39d56
fdc7b5a
 
712cbc5
49f9cb9
712cbc5
 
 
 
 
49f9cb9
712cbc5
1e670df
 
fdc7b5a
 
 
 
 
1e670df
72bae8c
1e670df
d6147f7
 
 
 
 
 
eae333f
 
72bae8c
eae333f
712cbc5
eae333f
 
9c13951
712cbc5
fdc7b5a
712cbc5
17aee7b
712cbc5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

import streamlit as st
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from flashrank import Ranker, RerankRequest
import numpy as np

@st.cache_resource
def get_embeddings():
    model_name = "BAAI/bge-large-en-v1.5"
    model_kwargs = {'device': 'cpu',"trust_remote_code":True}
    encode_kwargs = {'normalize_embeddings': True} # set True to compute caosine similarity
    model = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,)
    return model

baai_embeddings = get_embeddings()
kadhal_Server = FAISS.load_local("./",baai_embeddings)

# ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2")


st.header('kadhalTensor', divider='red')
st.header('_Adhalal :blue[Kadhal] Seiveer_ :blue[] :cupid:')
st.write("Kadhal Engine on Sangam Literature (WIP) - part of sangamTensor Project :love_letter: : by Prabakaran Chandran")

toggle = st.toggle('sample queries')
with st.form("my_form"):
   st.write("What do want to know about sangam era's love?")

   if toggle:
             question_input = st.selectbox('select a query:', 
                                           ("How was love marriage celebrated in the Sangam era, as mentioned in Akananuru?",
                                            "How did the heroine express her longing to her friend, as captured in Ainkurunuru?",
                                             "What makes love stronger and everlasting, according to Thirukkural?",))
        
   else:
            question_input = st.text_input("")

   
   

   # Every form must have a submit button.
   submitted = st.form_submit_button("Submit")
   if submitted:
       
       docs = kadhal_Server.similarity_search(question_input,k=5)
       tobeReranked = [{"text":doc.page_content , "metadata":doc.metadata} for doc in docs]
       # rerankInput = RerankRequest(
       #                  passages=tobeReranked,
       #                  query=question_input,)
       
       # reranked = ranker.rerank(rerankInput)
       sample = np.random.choice(tobeReranked,2)
       
       for out in sample:
           
           st.write(out["text"])
           st.code(out["metadata"]["poemInTamilWDet"].replace("\xa0", " "))
           st.divider()
       
       
st.write("Contact : https://www.linkedin.com/in/prabakaranchandrantheds/ for Collaborations" )

st.caption("the LLM based Generator Part is not included to make the app up and running efficiently for everyone to use today - ping if you want to know the entire RAG flow with zephyr" )
st.caption("Thanks and Credits should go to Ms.Vaidehi , who put magnanimous efforts to translate these many poems! : poems and translations taken from https://sangamtranslationsbyvaidehi.com/")
st.caption("Thirukkural Credits to the Curator: https://www.kaggle.com/datasets/rahulvks/thirukkural")