Spaces:

igor-im
/

fluxpromptexpander

Running

igor commited on Nov 30, 2024

Commit

5728ac8

1 Parent(s): 86682d1

initial

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,27 +1,33 @@
 import gradio as gr
 from gradio.components import textbox
 from huggingface_hub import hf_hub_download
-from vllm import LLM, SamplingParams
-def run_gguf_inference(prompt):
-    PROMPT_TEMPLATE = "<|user|>\n{prompt}</s>\n<|assistant|>\n"  # noqa: E501
-    prompt = PROMPT_TEMPLATE.format(prompt=prompt)
-    # Create a sampling params object.
-    sampling_params = SamplingParams(temperature=0, max_tokens=128)
-    # Create an LLM.
-    llm = LLM(model="igor-im/flux_prompt_expander",
-              tokenizer="igor-im/flux_prompt_expander",
-              gpu_memory_utilization=0.95)
-    outputs = llm.generate(prompt, sampling_params)
-    # Print the outputs.
-    for output in outputs:
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-interface = gr.Interface(fn=run_gguf_inference, inputs='textbox', outputs='textbox')
 interface.launch()

+from threading import Thread
+import os
+from typing import Iterator
 import gradio as gr
+import torch
 from gradio.components import textbox
 from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from transformers import AutoModelForCausalLM, TextIteratorStreamer, LlamaTokenizer
+from transformers import AutoTokenizer, AutoModelForCausalLM
+llm = Llama.from_pretrained(
+        repo_id="igor-im/flux_prompt_expander",
+        filename="unsloth.Q8_0.gguf"
+      # n_gpu_layers=-1, # Uncomment to use GPU acceleration
+      # seed=1337, # Uncomment to set a specific seed
+      # n_ctx=2048, # Uncomment to increase the context window
+)
+def response(prompt):
+    llm_response = llm(
+        "A portrait of a man.", # Prompt
+        max_tokens=200, # Generate up to 32 tokens, set to None to generate up to the end of the context window
+        echo=True # Echo the prompt back in the output
+    )
+    return llm_response.get('choices')[0].get('text')
+interface = gr.Interface(fn=response, inputs='textbox', outputs='textbox')
 interface.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,7 @@
 huggingface_hub==0.25.2
-gradio
-transformers
-vllm

 huggingface_hub==0.25.2
+gradio~=5.7.1
+transformers~=4.46.3
+vllm~=0.6.4.post1
+torch~=2.5.1
+huggingface-hub~=0.25.2
+llama-cpp-python