File size: 4,282 Bytes
dfcdb0c 7fb1387 d3e8df5 7fb1387 d99a0cf 7fb1387 dfcdb0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
---
license: apache-2.0
base_model:
- internlm/internlm2_5-7b-chat
---
# Converted LLaMA from InternLM2.5-7B-Chat
## Descritpion
This is a converted model from [InternLM2.5-7B-Chat](https://huggingface.co/internlm/internlm2_5-7b-chat) to __LLaMA__ format. This conversion allows you to use InternLM2.5-7B-Chat as if it were a LLaMA model, which is convenient for some *inference use cases*. The __precision__ is __excatly the same__ as the original model.
## Usage
You can load the model using the `LlamaForCausalLM` class as shown below:
```python
from transformers import AutoTokenizer, LlamaForCausalLM
device = "cpu" # cpu is exacatly the same
attn_impl = 'eager' # the attention implementation to use
meta_instruction = ("You are an AI assistant whose name is InternLM (书生·浦语).\n"
"- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory "
"(上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n"
"- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such "
"as English and 中文."
)
prompt1 = "介绍下你自己"
prompt2 = "介绍下上海人工智能实验室"
def build_inputs(tokenizer, query: str, history: List[Tuple[str, str]] = None, meta_instruction=meta_instruction):
if history is None:
history = []
if tokenizer.add_bos_token:
prompt = ""
else:
prompt = tokenizer.bos_token
if meta_instruction:
prompt += f"""<|im_start|>system\n{meta_instruction}<|im_end|>\n"""
for record in history:
prompt += f"""<|im_start|>user\n{record[0]}<|im_end|>\n<|im_start|>assistant\n{record[1]}<|im_end|>\n"""
prompt += f"""<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n"""
return tokenizer([prompt], return_tensors="pt")
@torch.inference_mode()
def chat(
model: Union[AutoModelForCausalLM, LlamaForCausalLM],
tokenizer,
query: str,
history: Optional[List[Tuple[str, str]]] = None,
streamer: Optional[BaseStreamer] = None,
max_new_tokens: int = 1024,
do_sample: bool = True,
temperature: float = 0.8,
top_p: float = 0.8,
meta_instruction: str = meta_instruction,
**kwargs,
):
if history is None:
history = []
inputs = build_inputs(tokenizer, query, history, meta_instruction)
inputs = {k: v.to(model.device) for k, v in inputs.items() if torch.is_tensor(v)}
# also add end-of-assistant token in eos token id to avoid unnecessary generation
eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0]]
outputs = model.generate(
**inputs,
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=do_sample,
temperature=temperature,
top_p=top_p,
eos_token_id=eos_token_id,
**kwargs,
)
outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]) :]
response = tokenizer.decode(outputs, skip_special_tokens=True)
response = response.split("<|im_end|>")[0]
history = history + [(query, response)]
return response, history
# use the official tokenizer
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2_5-7b-chat", trust_remote_code=True)
# use the converted LlaMA model
llama_model = LlamaForCausalLM.from_pretrained(
"silence09/InternLM2.5-7B-Chat-Converted-LlaMA",
torch_dtype='auto',
attn_implementation=attn_impl).to(device)
llama_model.eval()
response_llama_and_splitfunc_1, history = chat(llama_model, tokenizer, prompt1, history=[], do_sample=False)
print(f"User Input: {prompt1}\nConverted LlaMA Response: {response_llama_and_splitfunc_1}")
response_llama_and_splitfunc_2, history = chat(llama_model, tokenizer, prompt2, history=history, do_sample=False)
print(f"User Input: {prompt2}\nConverted LlaMA Response: {response_llama_and_splitfunc_2}")
```
## Precision Guarantee
To comare result with the original model, you can use this [code](https://github.com/silencelamb/naked_llama/blob/main/hf_example/hf_internlm_7b_llama_compare.py)
## More Info
It was converted using the python script available at [this repository](https://github.com/silencelamb/naked_llama/blob/main/hf_example/convert_internlm_to_llama_hf.py) |