|
from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor |
|
from modeling_ziya_blip2 import ZiyaBLIP2ForConditionalGeneration |
|
from PIL import Image |
|
|
|
|
|
LM_MODEL_PATH="local path of model IDEA-CCNL/Ziya-LLaMA-13B-v1" |
|
LM_MODEL_PATH="/cognitive_comp/gaoxinyu/huggingface_model/Ziya-LLaMA-13B-v1" |
|
lm_model = LlamaForCausalLM.from_pretrained(LM_MODEL_PATH) |
|
tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH) |
|
|
|
|
|
|
|
OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073] |
|
OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711] |
|
|
|
model = ZiyaBLIP2ForConditionalGeneration.from_pretrained(".", language_model=lm_model) |
|
image_size = model.config.vision_config.image_size |
|
image_processor = BlipImageProcessor( |
|
size={"height": image_size, "width": image_size}, |
|
image_mean=OPENAI_CLIP_MEAN, |
|
image_std=OPENAI_CLIP_STD, |
|
) |
|
model.cuda() |
|
generate_config = { |
|
"max_new_tokens": 128, |
|
"top_p": 0.1, |
|
"temperature": 0.7 |
|
} |
|
output = model.chat( |
|
tokenizer=tokenizer, |
|
pixel_values=image_processor(Image.open("wzry.jpg"), return_tensors="pt").pixel_values.to(model.device), |
|
query="这是什么游戏", |
|
previous_querys=[], |
|
previous_outputs=[], |
|
**generate_config, |
|
) |
|
print(output) |
|
|