nxphi47 commited on
Commit
b88fb33
1 Parent(s): 6964ae7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -1760,9 +1760,9 @@ def launch():
1760
 
1761
  if QUANTIZATION == 'awq':
1762
  print(F'Load model in int4 quantization')
1763
- llm = LLM(model=model_path, dtype="float16", tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
1764
  else:
1765
- llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
1766
 
1767
  try:
1768
  print(llm.llm_engine.workers[0].model)
 
1760
 
1761
  if QUANTIZATION == 'awq':
1762
  print(F'Load model in int4 quantization')
1763
+ llm = LLM(model=model_path, dtype="float16", tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq", max_model_len=8192)
1764
  else:
1765
+ llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, max_model_len=8192)
1766
 
1767
  try:
1768
  print(llm.llm_engine.workers[0].model)