File size: 2,218 Bytes
046e707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import warnings
warnings.filterwarnings("ignore")
import torchvision
torchvision.disable_beta_transforms_warning()


import openai
import pandas as pd
from transformers import BertTokenizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer


class egpt:
    def __init__(self, apiKey, modelName='gpt-4-turbo', embeddingModel='all-MiniLM-L6-v2', datasetPath='hf://datasets/Amod/mental_health_counseling_conversations/combined_dataset.json'):
        openai.api_key = apiKey
        self.modelName = modelName
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.embeddingModel = SentenceTransformer(embeddingModel)
        self.dataset = self.loadDataset(datasetPath)
        self.knowledgeBase = self.createKnowledgeBase()
    
    def loadDataset(self, path):
        dataset = pd.read_json(path, lines=True)
        return dataset[['Context', 'Response']].values.tolist()
    
    def createKnowledgeBase(self):
        knowledgeBase = []
        for context, response in self.dataset:
            embedding = self.embeddingModel.encode(context)
            knowledgeBase.append((embedding, response))
        return knowledgeBase
    
    def getSimilarResponse(self, userContext):
        userEmbedding = self.embeddingModel.encode(userContext)
        similarities = [cosine_similarity([userEmbedding], [kbEmbedding])[0][0] for kbEmbedding, _ in self.knowledgeBase]
        bestMatchIdx = similarities.index(max(similarities))
        _, bestResponse = self.knowledgeBase[bestMatchIdx]
        return bestResponse
    
    def queryGpt(self, context):
        response = openai.ChatCompletion.create(
            model=self.modelName,
            messages=[{'role': 'user', 'content': context}]
        )
        return response.choices[0].message['content']
    
    def respond(self, userContext):
        similarResponse = self.getSimilarResponse(userContext)
        prompt = f'Given the following context and a similar response, please respond appropriately:\n\nContext: {userContext}\n\nSimilar Response: {similarResponse}'
        return self.queryGpt(prompt)