datascientist22
commited on
Commit
•
4ee92ad
1
Parent(s):
eaf4e19
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,16 @@ import streamlit as st
|
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import torch
|
4 |
|
5 |
-
# Load the tokenizer and model for CPU
|
6 |
tokenizer = AutoTokenizer.from_pretrained("MohamedMotaz/Examination-llama-8b-4bit")
|
|
|
|
|
7 |
model = AutoModelForCausalLM.from_pretrained(
|
8 |
-
"MohamedMotaz/Examination-llama-8b-4bit",
|
9 |
-
torch_dtype=torch.float32 #
|
|
|
10 |
)
|
11 |
|
12 |
-
# Ensure the model runs on CPU
|
13 |
-
model = model.to("cpu")
|
14 |
-
|
15 |
# App Title
|
16 |
st.title("Exam Corrector: Automated Grading with LLama 8b Model (CPU)")
|
17 |
|
@@ -32,8 +32,8 @@ if st.button("Grade Answer"):
|
|
32 |
inputs = f"Model Answer: {model_answer}\n\nStudent Answer: {student_answer}\n\nResponse:"
|
33 |
|
34 |
# Tokenize the inputs using PyTorch tensors
|
35 |
-
input_ids = tokenizer(inputs, return_tensors="pt").input_ids
|
36 |
-
|
37 |
# Generate the response using the model (PyTorch, CPU-based)
|
38 |
with torch.no_grad():
|
39 |
outputs = model.generate(input_ids, max_length=200)
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import torch
|
4 |
|
5 |
+
# Load the tokenizer and model for CPU without bitsandbytes
|
6 |
tokenizer = AutoTokenizer.from_pretrained("MohamedMotaz/Examination-llama-8b-4bit")
|
7 |
+
|
8 |
+
# Load the model in full precision, explicitly avoiding 8-bit quantization
|
9 |
model = AutoModelForCausalLM.from_pretrained(
|
10 |
+
"MohamedMotaz/Examination-llama-8b-4bit",
|
11 |
+
torch_dtype=torch.float32, # Ensure it uses full precision (float32)
|
12 |
+
device_map="cpu", # Force the model to run on the CPU
|
13 |
)
|
14 |
|
|
|
|
|
|
|
15 |
# App Title
|
16 |
st.title("Exam Corrector: Automated Grading with LLama 8b Model (CPU)")
|
17 |
|
|
|
32 |
inputs = f"Model Answer: {model_answer}\n\nStudent Answer: {student_answer}\n\nResponse:"
|
33 |
|
34 |
# Tokenize the inputs using PyTorch tensors
|
35 |
+
input_ids = tokenizer(inputs, return_tensors="pt").input_ids
|
36 |
+
|
37 |
# Generate the response using the model (PyTorch, CPU-based)
|
38 |
with torch.no_grad():
|
39 |
outputs = model.generate(input_ids, max_length=200)
|