--- library_name: transformers pipeline_tag: text-generation inference: true widget: - text: Hello! example_title: Hello world group: Python --- This model is for debugging. It is randomly initialized using the config from [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) but with smaller size. Codes: ```python import os from typing import Dict import requests import torch import transformers from PIL import Image from torchvision import io from transformers import (AutoConfig, AutoModelForCausalLM, AutoProcessor, AutoTokenizer, GenerationConfig, pipeline, set_seed) from transformers.models.qwen2_vl import Qwen2VLForConditionalGeneration model_id = "Qwen/Qwen2-VL-7B-Instruct" repo_id = "yujiepan/qwen2-vl-tiny-random" save_path = f"/tmp/{repo_id}" config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) config.hidden_size = 16 config.intermediate_size = 32 config.num_attention_heads = 2 config.num_hidden_layers = 2 config.num_key_value_heads = 1 config.vision_config.embed_dim = 16 config.vision_config.num_heads = 2 config.vision_config.hidden_size = 16 config.vision_config.depth = 2 config.rope_scaling['mrope_section'] = [1, 1, 2] # sum needs to be 4 here model = Qwen2VLForConditionalGeneration(config=config) model = model.to(torch.bfloat16).cuda().eval() model.generation_config = GenerationConfig.from_pretrained( model_id, trust_remote_code=True, ) set_seed(42) with torch.no_grad(): for _, p in sorted(model.named_parameters()): torch.nn.init.uniform_(p, -0.3, 0.3) processor = AutoProcessor.from_pretrained(model_id) model.save_pretrained(save_path) processor.save_pretrained(save_path) os.system(f"ls -alh {save_path}") def try_inference(): url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" image = Image.open(requests.get(url, stream=True).raw) conversation = [ { "role": "user", "content": [ { "type": "image", }, {"type": "text", "text": "Describe this image."}, ], } ] processor = AutoProcessor.from_pretrained(save_path) model = Qwen2VLForConditionalGeneration.from_pretrained( save_path, torch_dtype=torch.bfloat16, device_map='cuda') text_prompt = processor.apply_chat_template( conversation, add_generation_prompt=True) inputs = processor( text=[text_prompt], images=[image], padding=True, return_tensors="pt" ) inputs = inputs.to("cuda") output_ids = model.generate(**inputs, max_new_tokens=16) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids) ] output_text = processor.batch_decode( generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True ) print(output_text) try_inference() ```