Tonic captainkyd commited on
Commit
764655f
·
verified ·
1 Parent(s): 555e2ea
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -2,7 +2,8 @@ import spaces
2
  import gradio as gr
3
  import torch
4
  import transformers
5
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
6
  import os
7
 
8
  title = """# Welcome to 🌟Tonic's🐇🥷🏻Trinity
@@ -23,6 +24,11 @@ Answer the Question by exploring multiple reasoning paths as follows:
23
  - Please note that while the focus is on the final answer in the response, it should also include intermediate thoughts inline to illustrate the deliberative reasoning process.
24
  In summary, leverage a Tree of Thoughts approach to actively explore multiple reasoning paths, evaluate thoughts heuristically, and explain the process - with the goal of producing insightful answers.
25
  """
 
 
 
 
 
26
 
27
  model_path = "WhiteRabbitNeo/Trinity-13B"
28
 
@@ -32,10 +38,9 @@ if not hf_token:
32
 
33
  model = AutoModelForCausalLM.from_pretrained(
34
  model_path,
35
- torch_dtype=torch.float16,
36
- device_map="auto",
37
- load_in_8bit=True,
38
- trust_remote_code=True,
39
  )
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
2
  import gradio as gr
3
  import torch
4
  import transformers
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
6
+ import accelerate
7
  import os
8
 
9
  title = """# Welcome to 🌟Tonic's🐇🥷🏻Trinity
 
24
  - Please note that while the focus is on the final answer in the response, it should also include intermediate thoughts inline to illustrate the deliberative reasoning process.
25
  In summary, leverage a Tree of Thoughts approach to actively explore multiple reasoning paths, evaluate thoughts heuristically, and explain the process - with the goal of producing insightful answers.
26
  """
27
+ quantization_config = BitsAndBytesConfig(
28
+ load_in_4bit=True,
29
+ bnb_4bit_use_double_quant=True,
30
+ bnb_4bit_compute_dtype=torch.bfloat16
31
+ )
32
 
33
  model_path = "WhiteRabbitNeo/Trinity-13B"
34
 
 
38
 
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_path,
41
+ device_map="auto",
42
+ trust_remote_code=True,
43
+ quantization_config=quantization_config
 
44
  )
45
 
46
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)