Spaces:
Sleeping
Sleeping
File size: 2,992 Bytes
d21cd56 c605450 d21cd56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# -*- coding: utf-8 -*-
"""Assessment3_end-to-end-RAG_Group2.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1rvsixmBM5WWbSd-d9ps-E1Au_uTKHTV1
"""
import os
import pandas as pd
from datetime import datetime
import pickle
import pandas as pd
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from tqdm import tqdm
import faiss
from sentence_transformers import SentenceTransformer
import openai
import gradio as gr
from transformers import LlamaForCausalLM, LlamaTokenizer
import numpy as np
embedder = SentenceTransformer('all-MiniLM-L6-v2')
with open("chunks.pkl", "rb") as f:
chunks = pickle.load(f)
faiss_index = faiss.read_index("my_faiss_index.faiss")
openai.api_key = "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"
def search(query, top_k=5):
# Encode the query
query_embedding = embedder.encode([query])
# Search for top_k similar chunks
distances, indices = faiss_index.search(query_embedding, top_k)
# Retrieve the chunks
results = [(chunks[i], distances[0][j]) for j, i in enumerate(indices[0])]
return results
def rag_search(query, top_k=5):
# Retrieve similar chunks using FAISS
results = search(query, top_k)
# Combine the top chunks for context
context = "\n\n".join([str(chunk) for chunk, _ in results]) # Convert chunk to string if it's not already
# Improved prompt for book assistance
prompt = (
f"You are a book expert assistant, helping the user find detailed information about books. "
f"Based on the following context, provide a thoughtful and detailed answer to the user's query. "
f"Use only the information from the context, and if there is insufficient information, say so politely.\n\n"
f"Context:\n{context}\n\n"
f"Question: {query}\n\n"
f"Answer:\n"
f"Make the answer format like this / Author: , Title: ----- , Date:------ , Description: ------- , Full Text (content): -----"
)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
max_tokens=1500,
n=1,
stop=None,
temperature=0.2, # Higher temperature means more creative or more hallucination
messages=messages
)
# Extract the generated response from the API response
generated_text = response.choices[0].message['content'].strip()
return generated_text
def gradio_rag(query):
return rag_search(query)
# Create Gradio interface
interface = gr.Interface(
fn=gradio_rag,
inputs="text",
outputs="text",
title="News paper Search Assistant",
description="Ask questions about books and get expert answers!"
)
# Launch Gradio interface
interface.launch()
|