import os import numpy as np from trulens_eval import ( Feedback, TruLlama, OpenAI ) from trulens_eval.feedback import Groundedness import nest_asyncio from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext from llama_index.core import load_index_from_storage from llama_index.core.node_parser import HierarchicalNodeParser from llama_index.core.node_parser import get_leaf_nodes from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.packs.auto_merging_retriever.base import AutoMergingRetrieverPack nest_asyncio.apply() openai = OpenAI() qa_relevance = ( Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance") .on_input_output() ) qs_relevance = ( Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance") .on_input() .on(TruLlama.select_source_nodes().node.text) .aggregate(np.mean) ) #grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai) grounded = Groundedness(groundedness_provider=openai) groundedness = ( Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness") .on(TruLlama.select_source_nodes().node.text) .on_output() .aggregate(grounded.grounded_statements_aggregator) ) feedbacks = [qa_relevance, qs_relevance, groundedness] def get_openai_api_key(): return os.getenv("OPENAI_API_KEY") def get_trulens_recorder(query_engine, feedbacks, app_id): tru_recorder = TruLlama( query_engine, app_id=app_id, feedbacks=feedbacks ) return tru_recorder def get_prebuilt_trulens_recorder(query_engine, app_id): tru_recorder = TruLlama( query_engine, app_id=app_id, feedbacks=feedbacks ) return tru_recorder def build_automerging_index( documents, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="merging_index", chunk_sizes=None, ): chunk_sizes = chunk_sizes or [2048, 512, 128] node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes) nodes = node_parser.get_nodes_from_documents(documents) leaf_nodes = get_leaf_nodes(nodes) merging_context = ServiceContext.from_defaults( llm=llm, embed_model=embed_model, ) storage_context = StorageContext.from_defaults() storage_context.docstore.add_documents(nodes) if not os.path.exists(save_dir): automerging_index = VectorStoreIndex( leaf_nodes, storage_context=storage_context, service_context=merging_context ) automerging_index.storage_context.persist(persist_dir=save_dir) else: automerging_index = load_index_from_storage( StorageContext.from_defaults(persist_dir=save_dir), service_context=merging_context, ) return automerging_index