lyimo commited on
Commit
b91b8c6
1 Parent(s): 0b2e271

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -1,39 +1,39 @@
1
  import os
2
- from transformers import pipeline
 
3
  from sentence_transformers import SentenceTransformer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
  import pandas as pd
7
- import gradio as gr
8
 
9
  # Load pre-trained Sentence Transformer model
10
- model = SentenceTransformer('LaBSE')
11
 
12
  # Load questions and answers from the CSV file
13
  df = pd.read_csv('combined_questions_and_answers.csv')
14
 
15
  # Encode all questions in the dataset
16
- question_embeddings = model.encode(df['Question'].tolist())
17
 
18
  # Hugging Face API details for Meta-Llama 3B
19
- api_key = os.getenv("HUGGINGFACE_API_KEY")
20
- if not api_key:
21
  raise ValueError("Hugging Face API key not found in environment variables. Please set the HUGGINGFACE_API_KEY environment variable.")
22
 
23
- pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", token=api_key)
 
 
24
 
25
  # Function to refine and translate text using Meta-Llama 3B
26
  def refine_text(prompt):
27
- messages = [
28
- {"role": "user", "content": prompt},
29
- ]
30
- response = pipe(messages)
31
- return response[0]['generated_text']
32
 
33
  # Function to find the most similar question and provide the answer
34
  def get_answer(user_question, threshold=0.30):
35
  # Encode the user question
36
- user_embedding = model.encode(user_question)
37
 
38
  # Calculate cosine similarities
39
  similarities = cosine_similarity([user_embedding], question_embeddings)
 
1
  import os
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  import numpy as np
7
  import pandas as pd
 
8
 
9
  # Load pre-trained Sentence Transformer model
10
+ model_sentence_transformer = SentenceTransformer('LaBSE')
11
 
12
  # Load questions and answers from the CSV file
13
  df = pd.read_csv('combined_questions_and_answers.csv')
14
 
15
  # Encode all questions in the dataset
16
+ question_embeddings = model_sentence_transformer.encode(df['Question'].tolist())
17
 
18
  # Hugging Face API details for Meta-Llama 3B
19
+ HF_TOKEN = os.environ.get("HUGGINGFACE_API_KEY", None)
20
+ if not HF_TOKEN:
21
  raise ValueError("Hugging Face API key not found in environment variables. Please set the HUGGINGFACE_API_KEY environment variable.")
22
 
23
+ # Load the tokenizer and model
24
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
25
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto")
26
 
27
  # Function to refine and translate text using Meta-Llama 3B
28
  def refine_text(prompt):
29
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
30
+ outputs = model.generate(**inputs, max_new_tokens=50)
31
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
32
 
33
  # Function to find the most similar question and provide the answer
34
  def get_answer(user_question, threshold=0.30):
35
  # Encode the user question
36
+ user_embedding = model_sentence_transformer.encode(user_question)
37
 
38
  # Calculate cosine similarities
39
  similarities = cosine_similarity([user_embedding], question_embeddings)