Deployment on huggingface endpoints
#2
by
Aliayub1995
- opened
I want to deploy the model on huggingface endpoints and run inference on it. I have created a handler.py file to deploy the model and a test.py file to run inference using API call to endpoint. Model is deployed successfully. What are the available ways to input the video to endpoint?
Handler.py:
from typing import Dict, List, Any
import sys
sys.path.append('./')
from videollama2 import model_init, mm_infer
from videollama2.utils import disable_torch_init
import logging
import os
class EndpointHandler:
def __init__(self, path: str = ""):
disable_torch_init()
self.model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
self.model, self.processor, self.tokenizer = model_init(self.model_path)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
logging.info(f"Received data: {data}")
modal = None
modal_path = None
instruct = None
inputs = data.get("inputs", data)
modal = inputs.get("modal", "video")
modal_path = inputs.get("modal_path", "")
instruct = inputs.get("instruct", "")
logging.info(f"Modal: {modal}, Modal Path: {modal_path}, Instruct: {instruct}")
if not modal_path or not instruct:
raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")
output = mm_infer(
self.processor[modal](modal_path),
instruct,
model=self.model,
tokenizer=self.tokenizer,
do_sample=False,
modal=modal
)
return [{"output": output}]
Test.py:
import requests
API_URL = ""
headers = {
"Accept": "application/json",
"Authorization": "",
"Content-Type": "application/json"
}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
payload = {
"inputs": {
"modal": "video",
"modal_path": "",
"instruct": "Describe what is happening in the video along with timestamps"
}
}
output = query(payload)
print(output)