igor commited on
Commit
5728ac8
·
1 Parent(s): 86682d1
Files changed (2) hide show
  1. app.py +23 -17
  2. requirements.txt +6 -3
app.py CHANGED
@@ -1,27 +1,33 @@
 
 
 
 
1
  import gradio as gr
 
2
  from gradio.components import textbox
3
  from huggingface_hub import hf_hub_download
 
 
4
 
5
- from vllm import LLM, SamplingParams
6
 
7
- def run_gguf_inference(prompt):
8
- PROMPT_TEMPLATE = "<|user|>\n{prompt}</s>\n<|assistant|>\n" # noqa: E501
9
- prompt = PROMPT_TEMPLATE.format(prompt=prompt)
10
- # Create a sampling params object.
11
- sampling_params = SamplingParams(temperature=0, max_tokens=128)
 
 
12
 
13
- # Create an LLM.
14
- llm = LLM(model="igor-im/flux_prompt_expander",
15
- tokenizer="igor-im/flux_prompt_expander",
16
- gpu_memory_utilization=0.95)
 
 
 
17
 
18
- outputs = llm.generate(prompt, sampling_params)
19
- # Print the outputs.
20
- for output in outputs:
21
- prompt = output.prompt
22
- generated_text = output.outputs[0].text
23
- print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
24
 
25
- interface = gr.Interface(fn=run_gguf_inference, inputs='textbox', outputs='textbox')
26
 
27
  interface.launch()
 
1
+ from threading import Thread
2
+ import os
3
+ from typing import Iterator
4
+
5
  import gradio as gr
6
+ import torch
7
  from gradio.components import textbox
8
  from huggingface_hub import hf_hub_download
9
+ from llama_cpp import Llama
10
+ from transformers import AutoModelForCausalLM, TextIteratorStreamer, LlamaTokenizer
11
 
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM
13
 
14
+ llm = Llama.from_pretrained(
15
+ repo_id="igor-im/flux_prompt_expander",
16
+ filename="unsloth.Q8_0.gguf"
17
+ # n_gpu_layers=-1, # Uncomment to use GPU acceleration
18
+ # seed=1337, # Uncomment to set a specific seed
19
+ # n_ctx=2048, # Uncomment to increase the context window
20
+ )
21
 
22
+ def response(prompt):
23
+ llm_response = llm(
24
+ "A portrait of a man.", # Prompt
25
+ max_tokens=200, # Generate up to 32 tokens, set to None to generate up to the end of the context window
26
+ echo=True # Echo the prompt back in the output
27
+ )
28
+ return llm_response.get('choices')[0].get('text')
29
 
 
 
 
 
 
 
30
 
31
+ interface = gr.Interface(fn=response, inputs='textbox', outputs='textbox')
32
 
33
  interface.launch()
requirements.txt CHANGED
@@ -1,4 +1,7 @@
1
  huggingface_hub==0.25.2
2
- gradio
3
- transformers
4
- vllm
 
 
 
 
1
  huggingface_hub==0.25.2
2
+ gradio~=5.7.1
3
+ transformers~=4.46.3
4
+ vllm~=0.6.4.post1
5
+ torch~=2.5.1
6
+ huggingface-hub~=0.25.2
7
+ llama-cpp-python