myr1-2

Running on Zero

App Files Files Community

wuhp commited on 14 days ago

Commit

436e3c6

verified ·

1 Parent(s): eabbd4b

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -40

app.py CHANGED Viewed

@@ -1,54 +1,77 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# ----------------------------------------------------------------
-# 1) Points to your Hugging Face repo and subfolder
-#    (where config.json, tokenizer.json, model safetensors, etc. reside).
-# ----------------------------------------------------------------
-MODEL_REPO = "wuhp/myr1"
-SUBFOLDER = "myr1"
-# ----------------------------------------------------------------
-# 2) Load the tokenizer
-#    trust_remote_code=True allows custom code (e.g., DeepSeek config/classes).
-# ----------------------------------------------------------------
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True
 )
-# ----------------------------------------------------------------
-# 3) Load the model
-#    - device_map="auto" tries to place layers on GPU and offload remainder to CPU if needed
-#    - torch_dtype can be float16, float32, bfloat16, etc., depending on GPU support
-# ----------------------------------------------------------------
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True,
-    device_map="auto",
-    torch_dtype=torch.float16,
     low_cpu_mem_usage=True
 )
-# Put model in evaluation mode
 model.eval()
-# ----------------------------------------------------------------
-# 4) Define the generation function
-# ----------------------------------------------------------------
-def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
     print("=== Starting generation ===")
-    # Move input tokens to the same device as model
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     try:
-        # Generate tokens
         output_ids = model.generate(
             **inputs,
-            max_new_tokens=max_length,  # This controls how many tokens beyond the prompt are generated
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
@@ -58,32 +81,45 @@ def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
     except Exception as e:
         print(f"Error during generation: {e}")
         return str(e)
-    # Decode back to text (skipping special tokens)
     return tokenizer.decode(output_ids[0], skip_special_tokens=True)
-# ----------------------------------------------------------------
 # 5) Build a Gradio UI
-# ----------------------------------------------------------------
 demo = gr.Interface(
     fn=generate_text,
     inputs=[
         gr.Textbox(
             lines=4,
             label="Prompt",
-            placeholder="Try a short prompt, e.g., Hello!"
         ),
-        gr.Slider(8, 512, value=64, step=1, label="Max New Tokens"),
-        gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
     ],
     outputs="text",
-    title="DeepSeek R1 Demo",
-    description="Generates text using the large DeepSeek model."
 )
-# ----------------------------------------------------------------
-# 6) Run the Gradio app
-# ----------------------------------------------------------------
 if __name__ == "__main__":
     demo.launch()

+import os
+import sys
+import ast
 import gradio as gr
 import torch
+import streamlit as st
+# No "spaces" or "transformers_gradio" imports here, since you said you want to use *your model* (myr1),
+# not external Spaces demos.
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# ------------------------------------------------------------------------------
+# 1) OPTIONAL: Environment Variable Code (MY_SCRIPT_CONTENT)
+#    If you don't need this dynamic script execution, remove the entire block.
+# ------------------------------------------------------------------------------
+script_repr = os.getenv("MY_SCRIPT_CONTENT")
+if script_repr:
+    # Attempt to parse & exec the script from environment variable
+    try:
+        script_content = ast.literal_eval(script_repr)
+        exec(script_content)
+    except (ValueError, SyntaxError) as e:
+        # Using Streamlit to display an error message in case this is run within a Streamlit environment
+        st.error(f"Error evaluating script from environment variable: {e}")
+else:
+    print("No extra script content found in 'MY_SCRIPT_CONTENT'.")
+# ------------------------------------------------------------------------------
+# 2) Model References for "myr1" from Hugging Face
+#    Make sure your HF repo is "wuhp/myr1" and your actual model files are in subfolder "myr1"
+# ------------------------------------------------------------------------------
+MODEL_REPO = "wuhp/myr1"    # The HF repository name
+SUBFOLDER = "myr1"          # The folder inside the repo containing config.json etc.
+# ------------------------------------------------------------------------------
+# 3) Load Tokenizer & Model
+#    trust_remote_code=True to allow custom config/modeling if you have them in the repo.
+# ------------------------------------------------------------------------------
+print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True
 )
+print("Loading model...")
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     subfolder=SUBFOLDER,
     trust_remote_code=True,
+    device_map="auto",         # auto-shard across GPU(s) if needed, else CPU fallback
+    torch_dtype=torch.float16,  # or torch.float32, torch.bfloat16, etc.
     low_cpu_mem_usage=True
 )
 model.eval()
+print("Model loaded successfully.")
+# ------------------------------------------------------------------------------
+# 4) Define Generation Function for Gradio
+# ------------------------------------------------------------------------------
+def generate_text(prompt, max_new_tokens=64, temperature=0.7, top_p=0.9):
+    """
+    Generate text using the myr1 model from Hugging Face.
+    """
     print("=== Starting generation ===")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     try:
         output_ids = model.generate(
             **inputs,
+            max_new_tokens=max_new_tokens,  # limit how many tokens beyond the prompt
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
     except Exception as e:
         print(f"Error during generation: {e}")
         return str(e)
     return tokenizer.decode(output_ids[0], skip_special_tokens=True)
+# ------------------------------------------------------------------------------
 # 5) Build a Gradio UI
+# ------------------------------------------------------------------------------
 demo = gr.Interface(
     fn=generate_text,
     inputs=[
         gr.Textbox(
             lines=4,
             label="Prompt",
+            placeholder="Ask a question or start a story..."
+        ),
+        gr.Slider(
+            minimum=8, maximum=512, step=1, value=64,
+            label="Max New Tokens"
+        ),
+        gr.Slider(
+            minimum=0.0, maximum=1.5, step=0.1, value=0.7,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.0, maximum=1.0, step=0.05, value=0.9,
+            label="Top-p (nucleus sampling)"
         ),
     ],
     outputs="text",
+    title="DeepSeek myr1 Demo",
+    description=(
+        "Generates text with the 'myr1' model from the Hugging Face Hub. "
+        "Enter a prompt and adjust generation settings."
+    )
 )
+# ------------------------------------------------------------------------------
+# 6) Launch the App
+# ------------------------------------------------------------------------------
 if __name__ == "__main__":
+    print("Launching Gradio demo...")
     demo.launch()