license: mit | |
tags: | |
- auto-gptq | |
- opt | |
- gptq | |
- 4bit | |
This model should use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) so you need to use `auto-gptq` | |
```py | |
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer, StoppingCriteria, PreTrainedTokenizerBase | |
from auto_gptq import AutoGPTQForCausalLM | |
model_id = 'seonglae/opt-125m-4bit-gptq' | |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) | |
model = AutoGPTQForCausalLM.from_quantized( | |
model_id, | |
model_basename=model_basename, | |
trust_remote_code=True, | |
device='cuda:0', | |
use_triton=False, | |
use_safetensors=True, | |
) | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
temperature=0.5, | |
top_p=0.95, | |
max_new_tokens=100, | |
repetition_penalty=1.15, | |
) | |
prompt = "USER: Are you AI?\nASSISTANT:" | |
pipe(prompt) | |
``` |