Bllossom
/

llama-3.2-Korean-Bllossom-AICA-5B

@@ -65,105 +65,77 @@ We, the Bllossom team, are pleased to announce the release of Bllossom-Vision, a
 ### Colab Tutorial
  - [Inference-Code-Link](Inference code coming soon)
-### Install Dependencies
-```bash
-pip install torch transformers==4.44.0
-```
-### Python code without Image
 ```python
-from transformers import LlavaNextForConditionalGeneration,LlavaNextProcessor
 import torch
-model = LlavaNextForConditionalGeneration.from_pretrained(
-  'Bllossom/llama-3.1-Korean-Bllossom-Vision-8B',
   torch_dtype=torch.bfloat16,
   device_map='auto'
 )
-processor = LlavaNextProcessor.from_pretrained('Bllossom/llama-3.1-Korean-Bllossom-Vision-8B')
-with torch.no_grad():
-    PROMPT=\
-    """You are a versatile AI assistant named Bllava, capable of both understanding and generating text as well as interpreting and analyzing images. Your role is to kindly and effectively answer the user’s questions, whether they are about text or images, and provide appropriate and helpful responses to all types of queries.
-    당신은 텍스트를 이해하고 생성하는 것뿐만 아니라 이미지를 해석하고 분석할 수 있는 다재다능한 AI 어시스턴트 블라바입니다. 사용자의 질문이 텍스트에 관한 것이든 이미지에 관한 것이든 친절하고 효과적으로 답변하며, 모든 유형의 질의에 대해 적절하고 유용한 응답을 제공하는 것이 당신의 역할입니다."""
-    instruction = '자연어처리 15주 분량 커리큘럼을 짜줘'
-    messages = [
-        {'role': 'system', 'content': f"{PROMPT}"},
-        {'role': 'user', 'content': f"{instruction}"}
-    ]
-    chat_messages = processor.tokenizer.apply_chat_template(
-        messages,
-        tokenize=True,
-        add_generation_prompt=True,
-        return_tensors='pt',
-    )
-    bos_token = processor.tokenizer.bos_token_id
-    chat_messages = torch.cat([torch.tensor([[bos_token]]),chat_messages],dim=-1).to(model.device)
-    output = model.generate(
-        input_ids = chat_messages,
-        use_cache=False,
-        max_new_tokens=2048,
-        top_p=0.9,
-        temperature=0.6,
-        do_sample=True,
-    )
-    print(processor.tokenizer.decode(output[0]))
 ```
-### Python code with Image
 ```python
-from PIL import Image
-from transformers import LlavaNextForConditionalGeneration,LlavaNextProcessor
 import torch
-model = LlavaNextForConditionalGeneration.from_pretrained(
-  'Bllossom/llama-3.1-Korean-Bllossom-Vision-8B',
   torch_dtype=torch.bfloat16,
   device_map='auto'
 )
-processor = LlavaNextProcessor.from_pretrained('Bllossom/llama-3.1-Korean-Bllossom-Vision-8B')
-image = Image.open('[IMAGE_PATH]').convert('RGB')
-PROMPT=\
-"""You are a versatile AI assistant named Bllava, capable of both understanding and generating text as well as interpreting and analyzing images. Your role is to kindly and effectively answer the user’s questions, whether they are about text or images, and provide appropriate and helpful responses to all types of queries.
-당신은 텍스트를 이해하고 생성하는 것뿐만 아니라 이미지를 해석하고 분석할 수 있는 다재다능한 AI 어시스턴트 블라바입니다. 사용자의 질문이 텍스트에 관한 것이든 이미지에 관한 것이든 친절하고 효과적으로 답변하며, 모든 유형의 질의에 대해 적절하고 유용한 응답을 제공하는 것이 당신의 역할입니다."""
-instruction = '이미지에 대해서 설명해주세요.'
 messages = [
-    {'role': 'system', 'content': f"{PROMPT}"},
-    {'role': 'user', 'content': f"<image>\n{instruction}"}
-]
-chat_messages = processor.tokenizer.apply_chat_template(
-  messages,
-  tokenize=False,
-  add_generation_prompt=True
-).to(model.device)
-inputs = processor(
-    chat_messages,
-    image,
-    return_tensors='pt',
-)
-output = model.generate(
-    **inputs,
-    max_new_tokens=1024,
-    )
-print(processor.tokenizer.decode(output[0]))
 ```

 ### Colab Tutorial
  - [Inference-Code-Link](Inference code coming soon)
+### Python code (Use Vision-language Model)
 ```python
+from transformers import MllamaForConditionalGeneration,MllamaProcessor
 import torch
+from PIL import Image
+import requests
+model = MllamaForConditionalGeneration.from_pretrained(
+  'Bllossom/llama-3.2-Korean-Bllossom-AICA-5.2B',
   torch_dtype=torch.bfloat16,
   device_map='auto'
 )
+processor = MllamaProcessor.from_pretrained('Bllossom/llama-3.2-Korean-Bllossom-AICA-5.2B')
+url = "https://t1.daumcdn.net/cfile/tistory/21527E4A543DCABE1D"
+image = Image.open(requests.get(url, stream=True).raw)
+messages = [
+  {'role': 'user','content': [
+    {'type':'image'}
+    {'type': 'text','text': '이 문서를 마크다운으로 바꿔줘'}
+    ]},
+  ]
+input_text = processor.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
+inputs = processor(
+    image,
+    input_text,
+    add_special_tokens=False,
+    return_tensors="pt",
+).to(model.device)
+output = model.generate(**inputs, max_new_tokens=256,temperature=0.1,eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'),use_cache=False)
+print(processor.decode(output[0]))
 ```
+### Python code (Use Language Model)
 ```python
+from transformers import MllamaForConditionalGeneration,MllamaProcessor
 import torch
+from PIL import Image
+import requests
+model = MllamaForConditionalGeneration.from_pretrained(
+  'Bllossom/llama-3.2-Korean-Bllossom-AICA-5.2B',
   torch_dtype=torch.bfloat16,
   device_map='auto'
 )
+processor = MllamaProcessor.from_pretrained('Bllossom/llama-3.2-Korean-Bllossom-AICA-5.2B')
+url = "https://cdn.discordapp.com/attachments/1156141391798345742/1313407928287494164/E18489E185B3E1848FE185B3E18485E185B5E186ABE18489E185A3E186BA202021-11-1620E1848BE185A9E18492E185AE2011.png?ex=675005f4&is=674eb474&hm=fc9c4231203f53c27f6edd2420961c182dd4a1ed14d4b73e04127f11393729af&"
+image = Image.open(requests.get(url, stream=True).raw)
 messages = [
+  {'role': 'user','content': [
+    {'type': 'text','text': '자연어처리 15주치 커리큘럼을 짜줘'}
+    ]},
+  ]
+input_text = processor.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
+inputs = processor(
+    images=None,
+    text=input_text,
+    add_special_tokens=False,
+    return_tensors="pt",
+).to(model.device)
+output = model.generate(**inputs,max_new_tokens=256,temperature=0.1,eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'),use_cache=False)
+print(processor.decode(output[0]))
 ```