Spaces:
Running
Running
File size: 2,721 Bytes
db694c4 b2b3b83 b580d80 b2b3b83 b580d80 b2b3b83 db694c4 b2b3b83 b580d80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
import numpy as np
from trulens_eval import (
Feedback,
TruLlama,
OpenAI
)
from trulens_eval.feedback import Groundedness
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index import load_index_from_storage
from llama_index.node_parser import HierarchicalNodeParser
from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext
import nest_asyncio
nest_asyncio.apply()
openai = OpenAI()
qa_relevance = (
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
.on_input_output()
)
qs_relevance = (
Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
.on_input()
.on(TruLlama.select_source_nodes().node.text)
.aggregate(np.mean)
)
#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
grounded = Groundedness(groundedness_provider=openai)
groundedness = (
Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
.on(TruLlama.select_source_nodes().node.text)
.on_output()
.aggregate(grounded.grounded_statements_aggregator)
)
feedbacks = [qa_relevance, qs_relevance, groundedness]
def get_openai_api_key():
return os.getenv("OPENAI_API_KEY")
def get_trulens_recorder(query_engine, feedbacks, app_id):
tru_recorder = TruLlama(
query_engine,
app_id=app_id,
feedbacks=feedbacks
)
return tru_recorder
def get_prebuilt_trulens_recorder(query_engine, app_id):
tru_recorder = TruLlama(
query_engine,
app_id=app_id,
feedbacks=feedbacks
)
return tru_recorder
def build_automerging_index(
documents,
llm,
embed_model="local:BAAI/bge-small-en-v1.5",
save_dir="merging_index",
chunk_sizes=None,
):
chunk_sizes = chunk_sizes or [2048, 512, 128]
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
nodes = node_parser.get_nodes_from_documents(documents)
leaf_nodes = get_leaf_nodes(nodes)
merging_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)
if not os.path.exists(save_dir):
automerging_index = VectorStoreIndex(
leaf_nodes, storage_context=storage_context, service_context=merging_context
)
automerging_index.storage_context.persist(persist_dir=save_dir)
else:
automerging_index = load_index_from_storage(
StorageContext.from_defaults(persist_dir=save_dir),
service_context=merging_context,
)
return automerging_index |