import gradio as gr from openai import OpenAI import os from dotenv import load_dotenv from langchain.prompts import PromptTemplate from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams from langchain_qdrant import QdrantVectorStore from langchain_openai import OpenAIEmbeddings import openai openai.api_key = os.getenv("OPENAI_API_KEY") load_dotenv() RUNPOD_KEY = os.getenv("RUNPOD_KEY") RUNPOD_URL = os.getenv("RUNPOD_URL") model = OpenAI(api_key=RUNPOD_KEY, base_url= RUNPOD_URL) QDRANT_URL = os.getenv("QDRANT_URL") QDRANT_KEY = os.getenv("QDRANT_KEY") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") client = QdrantClient(QDRANT_URL, api_key=QDRANT_KEY) # collection_name = "search_engine" collection_name = "week_4_assesment_embeddings" embeddings = OpenAIEmbeddings( model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY ) qdrant = QdrantVectorStore( client=client, collection_name=collection_name, embedding=embeddings ) promtp_template = """ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Input: {input} ### Response: """ prompt = PromptTemplate( input_variables=["instruction", "input"], template=promtp_template, ) def prompt_template(query): results = qdrant.similarity_search( query=query, k= 6) _ctx = '' for i, result in enumerate(results): _ctx += f'Content {i}: {result.page_content}\n-----\n' _prompt = prompt.format(instruction=query, input=_ctx) return _prompt def prompt_top6(text): # query_embedding = openai.Embedding.create( # input=text, # model="text-embedding-3-small" # )['data'][0]['embedding'] query_embedding = embeddings.embed_query(text) search_results = client.search( collection_name="week_4_assesment_embeddings", query_vector=query_embedding, limit=6 ) # search_results = qdrant.similarity_search( query=text, k= 6) print(search_results) chunks = '' for result in search_results: # print(f"Question: {text}") # print(f"Answer: {result.payload['answer']}") chunks += f"Chunk: {result.payload['context']}\n" chunks += '-----\n' _prompt = prompt.format(instruction=text, input=chunks) return _prompt def generate_response(prompt): response = model.chat.completions.create( model="cenrak/llama3.1_fineTuned_model", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ] ) return response.choices[0].message def main(query, history): # prompt = prompt_template(query) prompt = prompt_top6(query) resault = generate_response(prompt) return resault.content examples = [ "What is the Berry Export Summary 2028 and what is its purpose?", ] demo = gr.ChatInterface(fn=main, title = "Assignment 4 GPT", examples=examples) if __name__ == "__main__": demo.launch()