In [None]:
! pip install faiss-cpu, mistralai

In [40]:
from mistralai.client import MistralClient, ChatMessage
import requests
import numpy as np
import faiss
import os

api_key=os.environ["MISTRAL_API_KEY"]
client = MistralClient(api_key=api_key)

# 1. RAG from scratch

## Get data

In [3]:
response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt')
text = response.text

In [130]:
len(text)

75014

## Split document into chunks

In [4]:
chunk_size = 512
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

## Create embeddings for each text chunk

In [5]:
def get_text_embedding(input):
 embeddings_batch_response = client.embeddings(
 model="mistral-embed",
 input=input
 )
 return embeddings_batch_response.data[0].embedding

In [6]:
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

In [9]:
text_embeddings.shape

(147, 1024)

In [10]:
text_embeddings

array([[-0.04849243, 0.07305908, 0.01568604, ..., -0.0234375 ,
 -0.02072144, -0.01068115],
 [-0.04660034, 0.04846191, -0.00045729, ..., -0.00754929,
 -0.00577545, 0.01355743],
 [-0.02139282, 0.0625 , 0.00907898, ..., -0.02233887,
 -0.00765228, -0.00793457],
 ...,
 [-0.02787781, 0.04260254, 0.00785828, ..., -0.00067568,
 -0.01176453, -0.02828979],
 [-0.02966309, 0.06292725, 0.03979492, ..., -0.01296997,
 -0.00264549, -0.03845215],
 [-0.06185913, 0.05847168, 0.03988647, ..., -0.04724121,
 -0.01289368, -0.02728271]])

## Load into a vector database

In [11]:
d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

## Create embeddings for a question

In [12]:
question = "What were the two main things the author worked on before college?"
question_embeddings = np.array([get_text_embedding(question)])
question_embeddings.shape

(1, 1024)

In [13]:
question_embeddings

array([[-0.05456543, 0.03518677, 0.03723145, ..., -0.02763367,
 -0.00327873, 0.00323677]])

## Retrieve similar chunks from the vector database

In [35]:
D, I = index.search(question_embeddings, k=2) 
print(I)

[[ 0 90]]


In [43]:
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
print(retrieved_chunk)

['\n\nWhat I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\n\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This wa', "king on things that aren't prestigious doesn't guarantee you're on the right track, it at least guarantees you're not on the most common type of wrong one.\n\nOver the next several years I wrote lots of essays about all kinds of different topics. O'Reilly reprinted a collection of them as a book, called Hackers & Painters after one of the essays in it. I also worked on spam filters, and did some more painting. I used to have dinners for a group of friends every thursda

## Combine context and question in a prompt and generate response

In [37]:
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

In [41]:
def run_mistral(user_message, model="mistral-medium"):
 messages = [
 ChatMessage(role="user", content=user_message)
 ]
 chat_response = client.chat(
 model=model,
 messages=messages,
 temperature=1, 
 # max_tokens=100
 )
 return (chat_response.choices[0].message.content)

In [42]:
run_mistral(prompt)

'The two main things the author worked on before college were writing and programming. Specifically, the author wrote short stories and tried writing programs on an IBM 1401.'

# 2. LlamaIndex

In [None]:
!pip install llama-index

In [73]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms import MistralAI
from llama_index.embeddings import MistralAIEmbedding
from llama_index import ServiceContext
from llama_index.query_engine import RetrieverQueryEngine

# Load data
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt' -O pg_essay.txt
reader = SimpleDirectoryReader(input_files=["pg_essay.txt"])
documents = reader.load_data()
# Define LLM and embedding model 
llm = MistralAI(api_key=api_key,model="mistral-medium")
embed_model = MistralAIEmbedding(model_name='mistral-embed', api_key=api_key)
service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)
# Create vector store index 
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
# Create query engine
query_engine = index.as_query_engine(similarity_top_k=2)
response = query_engine.query(
 "What were the two main things the author worked on before college?"
)
print(str(response))

--2024-01-16 18:40:06-- https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘pg_essay.txt’


2024-01-16 18:40:07 (5,45 MB/s) - ‘pg_essay.txt’ saved [75042/75042]

The two main things the author worked on before college, outside of school, were writing and programming. In terms of writing, they wrote short stories, which they described as having hardly any plot and mainly featuring characters with strong feelings. As for programming, they tried writing programs on an IBM 1401 in 9th grade using an early version of Fortran. They typed programs on punch cards and stacked them in the card reader to load the program int