quble_model_v1_pretrain

Sleeping

App Files Files Community

wop commited on Sep 26, 2024

Commit

48b3788

verified ·

1 Parent(s): 2d0f9fd

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -21

app.py CHANGED Viewed

@@ -13,34 +13,38 @@ model = GPT2LMHeadModel.from_pretrained(model_path).to(device)
 # Set model to evaluation mode
 model.eval()
-# Function to generate text based on input prompt
 def generate_text(prompt):
     # Tokenize and encode the input prompt
     input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
-    # Generate continuation
     with torch.no_grad():
-        generated_ids = model.generate(
             input_ids,
-            max_length=50,                # Maximum length of generated text
-            num_return_sequences=1,        # Generate 1 sequence
-            pad_token_id=tokenizer.eos_token_id,  # Use EOS token for padding
-            do_sample=True,                # Enable sampling
-            top_k=50,                      # Top-k sampling
-            top_p=0.95                     # Nucleus sampling
-        )
-    # Decode the generated text
-    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-    return generated_text
-# Create a Gradio interface
 interface = gr.Interface(
-    fn=generate_text,                      # Function to call when interacting with the UI
-    inputs="text",                         # Input type: Single-line text
-    outputs="text",                        # Output type: Text (the generated output)
-    title="Quble Text Generation",         # Title of the UI
-    description="Enter a prompt to generate text using Quble."  # Simple description
 )
 # Launch the Gradio app

 # Set model to evaluation mode
 model.eval()
+# Function to generate text in a stream-based manner
 def generate_text(prompt):
     # Tokenize and encode the input prompt
     input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
+    max_length = 50  # Maximum length of generated text
+    # Generate continuation with streaming tokens
     with torch.no_grad():
+        for generated_ids in model.generate(
             input_ids,
+            max_length=max_length,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95,
+            output_scores=True,     # Include scores for sampling
+            return_dict_in_generate=True,
+            use_cache=True
+        ).sequences:
+            # Decode each step incrementally
+            decoded_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
+            yield decoded_text  # Stream the partial text back to the UI
+# Create a Gradio interface with streaming enabled
 interface = gr.Interface(
+    fn=generate_text,                          # Function to call when interacting with the UI
+    inputs="text",                             # Input type: Single-line text
+    outputs=gr.Markdown(),                     # Stream output using Markdown
+    title="Quble Text Generation",             # Title of the UI
+    description="Enter a prompt to generate text using Quble with live streaming."  # Simple description
 )
 # Launch the Gradio app