import gradio as gr from openai import OpenAI import os from dotenv import load_dotenv from langchain.prompts import PromptTemplate from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams from langchain_qdrant import QdrantVectorStore from langchain_openai import OpenAIEmbeddings load_dotenv() RUNPOD_KEY = os.getenv("RUNPOD_KEY") RUNPOD_URL = os.getenv("RUNPOD_URL") model = OpenAI(api_key=RUNPOD_KEY, base_url= RUNPOD_URL) QDRANT_URL = os.getenv("QDRANT_URL") QDRANT_KEY = os.getenv("QDRANT_KEY") print(QDRANT_URL) print(QDRANT_KEY) OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") print(OPENAI_API_KEY) client = QdrantClient(QDRANT_URL, api_key=QDRANT_KEY) collection_name = "search_engine" embeddings = OpenAIEmbeddings( model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY ) qdrant = QdrantVectorStore( client=client, collection_name=collection_name, embedding=embeddings ) promtp_template = """ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Input: {input} ### Response: """ prompt = PromptTemplate( input_variables=["instruction", "input"], template=promtp_template, ) def prompt_template(query): results = qdrant.similarity_search( query=query, k=3 ) _ctx = '' for i, result in enumerate(results): _ctx += f'Content {i}: {result.page_content}\n-----\n' _prompt = prompt.format(instruction=query, input=_ctx) return _prompt def generate_response(prompt): response = model.chat.completions.create( model="cenrak/llama3.1_fineTuned_model", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ] ) return response.choices[0].message def main(query, history): prompt = prompt_template(query) resault = generate_response(prompt) return resault.content demo = gr.ChatInterface(fn=main, title = "News GPT") if __name__ == "__main__": demo.launch()