Spaces:

thesven
/

Llama3-8B-SFT-code_bagel-bnb-4bit

Runtime error

thesven commited on May 25, 2024

Commit

964cd90

1 Parent(s): 8c6800e

basic setup

Files changed (2) hide show

app.py ADDED Viewed

+import gradio as gr
+import spaces
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+model_to_use = "thesven/Llama3-8B-SFT-code_bagel-bnb-4bit"
+@spaces.GPU
+def start(n):
+    model_name_or_path = "thesven/Llama3-8B-SFT-code_bagel-bnb-4bit"
+    # BitsAndBytesConfig for loading the model in 4-bit precision
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype="bfloat16",
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        device_map="auto",
+        trust_remote_code=True,
+        quantization_config=bnb_config
+    )
+    model.pad_token = model.config.eos_token_id
+    # Example response generation
+    input_text = "Hello, how are you?"
+    input_ids = tokenizer(input_text, return_tensors='pt').input_ids.cuda()
+    output = model.generate(inputs=input_ids, max_new_tokens=50)
+    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return generated_text
+demo = gr.Interface(fn=start, inputs=gr.Number(), outputs=gr.Text())
+demo.launch()

requirements.txt ADDED Viewed

+accelerate
+bitsandbytes
+transformers