File size: 1,115 Bytes
084003f
ebf0322
 
 
85a9901
 
 
 
 
ebf0322
 
 
 
084003f
2e7be67
 
 
 
85a9901
 
 
 
 
 
 
ebf0322
 
 
85a9901
 
ebf0322
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
import openai
import os
import json
import numpy as np
import torch

from transformers import AutoProcessor, AutoModelForCausalLM

openai.organization = os.getenv("API_ORG")
openai.api_key = os.getenv("API_KEY")
app_password = os.getenv("APP_PASSWORD")
app_username = os.getenv("APP_USERNAME")

checkpoint = "openai/clip-vit-base-patch32"
processor = AutoProcessor.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)

def generate(input_image):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    inputs = processor(images=input_image, return_tensors="pt").to(device)
    pixel_values = inputs.pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_caption

demo = gr.Interface(
    fn=generate,
    inputs=gr.Image(label="Input", elem_id="input_image", type="pil"),
    outputs=gr.Text(label="Generated Caption"),
    flagging_options=[],
)

demo.launch(share=False, auth=(app_username, app_password))