Ovis2-16B

Paused

App Files Files Community

Ovis2-16B / app.py

openfree

Update app.py

bc07dcb verified 4 days ago

raw

history blame contribute delete

4.07 kB

	import logging
	from typing import List
	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, TextIteratorStreamer

	logging.getLogger("httpx").setLevel(logging.WARNING)
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	model_name = 'AIDC-AI/Ovis2-16B'
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True
	).to(device='cuda')

	text_tokenizer = model.get_text_tokenizer()
	visual_tokenizer = model.get_visual_tokenizer()
	streamer = TextIteratorStreamer(
	text_tokenizer,
	skip_prompt=True,
	skip_special_tokens=True
	)

	IMAGE_PLACEHOLDER = "<image>"

	def initialize_gen_kwargs():
	return {
	"max_new_tokens": 1536,
	"do_sample": False,
	"top_p": None,
	"top_k": None,
	"temperature": None,
	"repetition_penalty": 1.05,
	"eos_token_id": model.generation_config.eos_token_id,
	"pad_token_id": text_tokenizer.pad_token_id,
	"use_cache": True
	}

	def submit_chat(chatbot, text_input, image_input):
	if text_input.strip() or image_input is not None:
	chatbot.append((text_input, ""))
	return chatbot, "", None

	def ovis_chat(chatbot, text_input, image_input):
	# 기존 대화(질문/답변) 재구성
	conversations = []
	for q, r in chatbot[:-1]:
	conversations.append({"from": "human", "value": q})
	conversations.append({"from": "gpt", "value": r})

	# 마지막 질문
	last_query = chatbot[-1][0]
	if image_input is not None:
	last_query = f"{IMAGE_PLACEHOLDER}\n{last_query}"

	conversations.append({"from": "human", "value": last_query})

	# === 수정 포인트 ===
	# preprocess_inputs()의 두 번째 인자는 'images'이며,
	# image=image_input(키워드) X -> [image_input] (리스트) 형태로 전달.
	prompt, input_ids, pixel_values = model.preprocess_inputs(
	conversations,
	[image_input] if image_input is not None else None, # 두 번째 인자로 이미지
	max_partition=16
	)
	attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id)

	model_inputs = {
	"inputs": input_ids.unsqueeze(0).to(device='cuda'),
	"attention_mask": attention_mask.unsqueeze(0).to(device='cuda'),
	"pixel_values": (
	[pixel_values.to(dtype=visual_tokenizer.dtype, device='cuda')]
	if pixel_values is not None else
	[None]
	),
	}

	gen_kwargs = initialize_gen_kwargs()
	with torch.inference_mode():
	model.generate(model_inputs, gen_kwargs, streamer=streamer)
	response = ""
	for new_text in streamer:
	response += new_text
	chatbot[-1][1] = response
	yield chatbot

	logger.info("[OVIS_CONV_START]")
	for i, (req, ans) in enumerate(chatbot, 1):
	logger.info(f"Q{i}: {req}\nA{i}: {ans}")
	logger.info("[OVIS_CONV_END]")

	def clear_chat():
	return [], "", None

	with gr.Blocks(title="Ovis Demo", theme=gr.themes.Ocean()) as demo:
	chatbot = gr.Chatbot(label="Ovis", height=500, show_copy_button=True)
	text_input = gr.Textbox(label="Prompt", placeholder="질문을 입력하세요...", lines=1)
	image_input = gr.Image(label="Image (optional)", type="pil")

	with gr.Row():
	send_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	send_btn.click(
	fn=submit_chat,
	inputs=[chatbot, text_input, image_input],
	outputs=[chatbot, text_input, image_input]
	).then(
	fn=ovis_chat,
	inputs=[chatbot, text_input, image_input],
	outputs=chatbot
	)

	text_input.submit(
	fn=submit_chat,
	inputs=[chatbot, text_input, image_input],
	outputs=[chatbot, text_input, image_input]
	).then(
	fn=ovis_chat,
	inputs=[chatbot, text_input, image_input],
	outputs=chatbot
	)

	clear_btn.click(fn=clear_chat, outputs=[chatbot, text_input, image_input])

	demo.launch()