import langchain
import gptcache
import time
from langchain.llms import OpenAI
from gptcache.processor.pre import get_prompt
from gptcache.manager.factory import get_data_manager
from langchain.cache import GPTCache
from gptcache.manager import get_data_manager, CacheBase, VectorBase
from gptcache import Cache
from gptcache.embedding import Onnx
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation

llm = OpenAI(model_name="text-davinci-003")

# Avoid multiple caches using the same file, causing different llm model caches to affect each other
i = 0
file_prefix = "data_map_similarity"
llm_cache = Cache()


def init_gptcache_map(cache_obj: gptcache.Cache):
    global i
    cache_path = f'{file_prefix}_{i}.txt'
    onnx = Onnx()
    cache_base = CacheBase('sqlite')
    vector_base = VectorBase('faiss', dimension=onnx.dimension)
    data_manager = get_data_manager(cache_base, vector_base, max_size=10, clean_size=2, data_path=cache_path)
    cache_obj.init(
        pre_embedding_func=get_prompt,
        embedding_func=onnx.to_embeddings,
        data_manager=data_manager,
        similarity_evaluation=SearchDistanceEvaluation(),
    )
    i += 1

langchain.llm_cache = GPTCache(init_gptcache_map)

%%time
for i in range(20):
    start = time.perf_counter()
    prompt = "男生有2人，女生有{:d}人，一共多少人？".format(i)
    print("男生有2人，女生有{:d}人, {:s}。 suspend: {:0.4f}".format(i, llm(prompt), time.perf_counter() - start))