import streamlit as st import os from io import StringIO from llama_index.llms import HuggingFaceInferenceAPI from llama_index.embeddings import HuggingFaceInferenceAPIEmbedding from llama_index import ServiceContext, VectorStoreIndex from llama_index.schema import Document import uuid from llama_index.vector_stores.types import MetadataFilters, ExactMatchFilter inference_api_key = st.secrets["INFRERENCE_API_TOKEN"] llm = HuggingFaceInferenceAPI( model_name="mistralai/Mistral-7B-Instruct-v0.2", token=inference_api_key) embed_model = HuggingFaceInferenceAPIEmbedding( model_name="Gooly/gte-small-en-fine-tuned-e-commerce", token=inference_api_key, model_kwargs={"device": ""}, encode_kwargs={"normalize_embeddings": True}, ) service_context = ServiceContext.from_defaults( embed_model=embed_model, llm=llm) html_file = st.file_uploader("Upload a html file", type=["html"]) if html_file is not None: stringio = StringIO(html_file.getvalue().decode("utf-8")) st.write(stringio) string_data = stringio.read() st.write(string_data) document_id = uuid.uuid4() document = Document(text=string_data) document.metadata["id"] = document_id documents = [document] filters = MetadataFilters( filters=[ExactMatchFilter(key="id", value=document_id)]) index = VectorStoreIndex.from_documents( documents, show_progress=True, metadata={"source": "HTML"}, service_context=service_context) query_engine = index.as_query_engine( filters=filters, service_context=service_context) response = query_engine.query("What is the current price of this product?") st.write(response)