fffiloni commited on
Commit
dd17729
1 Parent(s): 522e040

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -36
app.py CHANGED
@@ -1,46 +1,27 @@
1
  import gradio as gr
2
- import torch
3
-
4
- from PIL import Image
5
- from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
6
-
7
- model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b")
8
- processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
9
-
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- model.to(device)
12
 
13
  import os
14
  hf_token = os.environ.get('HF_TOKEN')
15
  from gradio_client import Client
16
  client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
17
 
18
- def infer(image_input):
19
- #img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
20
- raw_image = Image.open(image_input).convert('RGB')
21
 
22
- prompt = "Can you please describe what's happening in the image, and give information about the characters and the place ?"
23
- inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(device)
24
 
25
- outputs = model.generate(
26
- **inputs,
27
- do_sample=False,
28
- num_beams=5,
29
- max_length=256,
30
- min_length=1,
31
- top_p=0.9,
32
- repetition_penalty=1.5,
33
- length_penalty=1.0,
34
- temperature=1,
35
  )
36
- generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
37
- print(generated_text)
38
-
39
 
40
 
41
  llama_q = f"""
42
  I'll give you a simple image caption, from i want you to provide a story that would fit well with the image:
43
- '{generated_text}'
44
 
45
  """
46
 
@@ -49,12 +30,9 @@ def infer(image_input):
49
  api_name="/predict"
50
  )
51
 
52
-
53
-
54
-
55
  print(f"Llama2 result: {result}")
56
 
57
- return generated_text, result
58
 
59
  css="""
60
  #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
@@ -67,9 +45,6 @@ with gr.Blocks(css=css) as demo:
67
  """
68
  # Image to Story
69
  Upload an image, get a story !
70
- <br/>
71
- <br/>
72
- [![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg)](https://huggingface.co/spaces/fffiloni/SplitTrack2MusicGen?duplicate=true) for longer audio, more control and no queue.</p>
73
  """
74
  )
75
  image_in = gr.Image(label="Image input", type="filepath")
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
2
 
3
  import os
4
  hf_token = os.environ.get('HF_TOKEN')
5
  from gradio_client import Client
6
  client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
7
 
8
+ clipi_client = Client("https://fffiloni-clip-interrogator-2.hf.space/")
9
+
 
10
 
11
+ def infer(image_input):
 
12
 
13
+ clipi_result = clipi_client.predict(
14
+ image_input, # str (filepath or URL to image) in 'parameter_3' Image component
15
+ "best", # str in 'Select mode' Radio component
16
+ 6, # int | float (numeric value between 2 and 24) in 'best mode max flavors' Slider component
17
+ api_name="/clipi2"
 
 
 
 
 
18
  )
19
+ print(clipi_result)
 
 
20
 
21
 
22
  llama_q = f"""
23
  I'll give you a simple image caption, from i want you to provide a story that would fit well with the image:
24
+ '{clipi_result}'
25
 
26
  """
27
 
 
30
  api_name="/predict"
31
  )
32
 
 
 
 
33
  print(f"Llama2 result: {result}")
34
 
35
+ return clipi_result, result
36
 
37
  css="""
38
  #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
 
45
  """
46
  # Image to Story
47
  Upload an image, get a story !
 
 
 
48
  """
49
  )
50
  image_in = gr.Image(label="Image input", type="filepath")