|
import gradio as gr |
|
import openai |
|
import os |
|
import json |
|
import numpy as np |
|
import torch |
|
|
|
from transformers import AutoProcessor, AutoModelForCausalLM |
|
|
|
openai.organization = os.getenv("API_ORG") |
|
openai.api_key = os.getenv("API_KEY") |
|
app_password = os.getenv("APP_PASSWORD") |
|
app_username = os.getenv("APP_USERNAME") |
|
|
|
checkpoint = "openai/clip-vit-base-patch32" |
|
processor = AutoProcessor.from_pretrained(checkpoint) |
|
model = AutoModelForCausalLM.from_pretrained(checkpoint) |
|
|
|
def generate(input_image): |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
inputs = processor(images=input_image, return_tensors="pt").to(device) |
|
pixel_values = inputs.pixel_values |
|
generated_ids = model.generate(pixel_values=pixel_values, max_length=50) |
|
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
return generated_caption |
|
|
|
demo = gr.Interface( |
|
fn=generate, |
|
inputs=gr.Image(label="Input", elem_id="input_image", type="pil"), |
|
outputs=gr.Text(label="Generated Caption"), |
|
flagging_options=[], |
|
) |
|
|
|
demo.launch(share=False, auth=(app_username, app_password)) |
|
|