from sentence_transformers import SentenceTransformer, util from PIL import Image import gradio as gr import requests def get_image_embedding(image): image_model = SentenceTransformer('clip-ViT-B-32') img_emb = image_model.encode(image) return {"embedding": img_emb.tolist()} def get_text_embedding(text): multilingual_text_model = SentenceTransformer('clip-ViT-B-32-multilingual-v1') text_emb = multilingual_text_model.encode(text) print(text_emb) print(type(text_emb)) print(text_emb.ndim) return {"embedding": text_emb.tolist()} image_embedding = gr.Interface(fn=get_image_embedding, inputs=gr.Image(type="pil"), outputs=gr.JSON(api_name="image-embedding"), title="Image Embedding") text_embedding = gr.Interface(fn=get_text_embedding, inputs=gr.Textbox(), outputs=gr.JSON(api_name="text-embedding"), title="Text Embedding") space = gr.TabbedInterface([image_embedding, text_embedding], ["Image Embedding", "Text Embedding"]) space.launch()