Spaces:
wuhp
/
Running on Zero

wuhp commited on
Commit
436e3c6
·
verified ·
1 Parent(s): eabbd4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -40
app.py CHANGED
@@ -1,54 +1,77 @@
 
 
 
 
1
  import gradio as gr
2
  import torch
 
 
 
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- # ----------------------------------------------------------------
6
- # 1) Points to your Hugging Face repo and subfolder
7
- # (where config.json, tokenizer.json, model safetensors, etc. reside).
8
- # ----------------------------------------------------------------
9
- MODEL_REPO = "wuhp/myr1"
10
- SUBFOLDER = "myr1"
 
 
 
 
 
 
 
 
 
 
11
 
12
- # ----------------------------------------------------------------
13
- # 2) Load the tokenizer
14
- # trust_remote_code=True allows custom code (e.g., DeepSeek config/classes).
15
- # ----------------------------------------------------------------
 
 
 
 
 
 
 
 
16
  tokenizer = AutoTokenizer.from_pretrained(
17
  MODEL_REPO,
18
  subfolder=SUBFOLDER,
19
  trust_remote_code=True
20
  )
21
 
22
- # ----------------------------------------------------------------
23
- # 3) Load the model
24
- # - device_map="auto" tries to place layers on GPU and offload remainder to CPU if needed
25
- # - torch_dtype can be float16, float32, bfloat16, etc., depending on GPU support
26
- # ----------------------------------------------------------------
27
  model = AutoModelForCausalLM.from_pretrained(
28
  MODEL_REPO,
29
  subfolder=SUBFOLDER,
30
  trust_remote_code=True,
31
- device_map="auto",
32
- torch_dtype=torch.float16,
33
  low_cpu_mem_usage=True
34
  )
35
-
36
- # Put model in evaluation mode
37
  model.eval()
38
 
39
- # ----------------------------------------------------------------
40
- # 4) Define the generation function
41
- # ----------------------------------------------------------------
42
- def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
 
 
 
 
 
43
  print("=== Starting generation ===")
44
- # Move input tokens to the same device as model
45
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
-
47
  try:
48
- # Generate tokens
49
  output_ids = model.generate(
50
  **inputs,
51
- max_new_tokens=max_length, # This controls how many tokens beyond the prompt are generated
52
  temperature=temperature,
53
  top_p=top_p,
54
  do_sample=True,
@@ -58,32 +81,45 @@ def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
58
  except Exception as e:
59
  print(f"Error during generation: {e}")
60
  return str(e)
61
-
62
- # Decode back to text (skipping special tokens)
63
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
64
 
65
- # ----------------------------------------------------------------
 
66
  # 5) Build a Gradio UI
67
- # ----------------------------------------------------------------
68
  demo = gr.Interface(
69
  fn=generate_text,
70
  inputs=[
71
  gr.Textbox(
72
  lines=4,
73
  label="Prompt",
74
- placeholder="Try a short prompt, e.g., Hello!"
 
 
 
 
 
 
 
 
 
 
 
 
75
  ),
76
- gr.Slider(8, 512, value=64, step=1, label="Max New Tokens"),
77
- gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature"),
78
- gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
79
  ],
80
  outputs="text",
81
- title="DeepSeek R1 Demo",
82
- description="Generates text using the large DeepSeek model."
 
 
 
83
  )
84
 
85
- # ----------------------------------------------------------------
86
- # 6) Run the Gradio app
87
- # ----------------------------------------------------------------
88
  if __name__ == "__main__":
 
89
  demo.launch()
 
1
+ import os
2
+ import sys
3
+ import ast
4
+
5
  import gradio as gr
6
  import torch
7
+ import streamlit as st
8
+ # No "spaces" or "transformers_gradio" imports here, since you said you want to use *your model* (myr1),
9
+ # not external Spaces demos.
10
+
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
 
13
+ # ------------------------------------------------------------------------------
14
+ # 1) OPTIONAL: Environment Variable Code (MY_SCRIPT_CONTENT)
15
+ # If you don't need this dynamic script execution, remove the entire block.
16
+ # ------------------------------------------------------------------------------
17
+ script_repr = os.getenv("MY_SCRIPT_CONTENT")
18
+
19
+ if script_repr:
20
+ # Attempt to parse & exec the script from environment variable
21
+ try:
22
+ script_content = ast.literal_eval(script_repr)
23
+ exec(script_content)
24
+ except (ValueError, SyntaxError) as e:
25
+ # Using Streamlit to display an error message in case this is run within a Streamlit environment
26
+ st.error(f"Error evaluating script from environment variable: {e}")
27
+ else:
28
+ print("No extra script content found in 'MY_SCRIPT_CONTENT'.")
29
 
30
+ # ------------------------------------------------------------------------------
31
+ # 2) Model References for "myr1" from Hugging Face
32
+ # Make sure your HF repo is "wuhp/myr1" and your actual model files are in subfolder "myr1"
33
+ # ------------------------------------------------------------------------------
34
+ MODEL_REPO = "wuhp/myr1" # The HF repository name
35
+ SUBFOLDER = "myr1" # The folder inside the repo containing config.json etc.
36
+
37
+ # ------------------------------------------------------------------------------
38
+ # 3) Load Tokenizer & Model
39
+ # trust_remote_code=True to allow custom config/modeling if you have them in the repo.
40
+ # ------------------------------------------------------------------------------
41
+ print("Loading tokenizer...")
42
  tokenizer = AutoTokenizer.from_pretrained(
43
  MODEL_REPO,
44
  subfolder=SUBFOLDER,
45
  trust_remote_code=True
46
  )
47
 
48
+ print("Loading model...")
 
 
 
 
49
  model = AutoModelForCausalLM.from_pretrained(
50
  MODEL_REPO,
51
  subfolder=SUBFOLDER,
52
  trust_remote_code=True,
53
+ device_map="auto", # auto-shard across GPU(s) if needed, else CPU fallback
54
+ torch_dtype=torch.float16, # or torch.float32, torch.bfloat16, etc.
55
  low_cpu_mem_usage=True
56
  )
 
 
57
  model.eval()
58
 
59
+ print("Model loaded successfully.")
60
+
61
+ # ------------------------------------------------------------------------------
62
+ # 4) Define Generation Function for Gradio
63
+ # ------------------------------------------------------------------------------
64
+ def generate_text(prompt, max_new_tokens=64, temperature=0.7, top_p=0.9):
65
+ """
66
+ Generate text using the myr1 model from Hugging Face.
67
+ """
68
  print("=== Starting generation ===")
 
69
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
70
+
71
  try:
 
72
  output_ids = model.generate(
73
  **inputs,
74
+ max_new_tokens=max_new_tokens, # limit how many tokens beyond the prompt
75
  temperature=temperature,
76
  top_p=top_p,
77
  do_sample=True,
 
81
  except Exception as e:
82
  print(f"Error during generation: {e}")
83
  return str(e)
84
+
 
85
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
86
 
87
+
88
+ # ------------------------------------------------------------------------------
89
  # 5) Build a Gradio UI
90
+ # ------------------------------------------------------------------------------
91
  demo = gr.Interface(
92
  fn=generate_text,
93
  inputs=[
94
  gr.Textbox(
95
  lines=4,
96
  label="Prompt",
97
+ placeholder="Ask a question or start a story..."
98
+ ),
99
+ gr.Slider(
100
+ minimum=8, maximum=512, step=1, value=64,
101
+ label="Max New Tokens"
102
+ ),
103
+ gr.Slider(
104
+ minimum=0.0, maximum=1.5, step=0.1, value=0.7,
105
+ label="Temperature"
106
+ ),
107
+ gr.Slider(
108
+ minimum=0.0, maximum=1.0, step=0.05, value=0.9,
109
+ label="Top-p (nucleus sampling)"
110
  ),
 
 
 
111
  ],
112
  outputs="text",
113
+ title="DeepSeek myr1 Demo",
114
+ description=(
115
+ "Generates text with the 'myr1' model from the Hugging Face Hub. "
116
+ "Enter a prompt and adjust generation settings."
117
+ )
118
  )
119
 
120
+ # ------------------------------------------------------------------------------
121
+ # 6) Launch the App
122
+ # ------------------------------------------------------------------------------
123
  if __name__ == "__main__":
124
+ print("Launching Gradio demo...")
125
  demo.launch()