mobicham commited on
Commit
a3f0fb1
·
verified ·
1 Parent(s): 93bb12c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -0
README.md CHANGED
@@ -77,3 +77,16 @@ outputs = model.generate(**inputs.to(model.device), max_new_tokens=1000, cache_i
77
  if(backend == 'gemlite'):
78
  gemlite.core.GemLiteLinear.cache_config('/tmp/gemlite_config.json')
79
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if(backend == 'gemlite'):
78
  gemlite.core.GemLiteLinear.cache_config('/tmp/gemlite_config.json')
79
  ```
80
+
81
+ Use in <a href="https://github.com/vllm-project/vllm/">vllm</a>:
82
+ ```Python
83
+ from vllm import LLM
84
+ from vllm.sampling_params import SamplingParams
85
+
86
+ model_id = "mobiuslabsgmbh/Qwen2.5-14B-Instruct-1M_4bitgs64_hqq_hf"
87
+
88
+ llm = LLM(model=model_id, max_model_len=4096, enable_chunked_prefill=False)
89
+ sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=1024)
90
+ outputs = llm.generate(["What is the capital of Germany?"], sampling_params)
91
+ print(outputs[0].outputs[0].text)
92
+ ```