Update modules/pmbl.py
Browse files- modules/pmbl.py +2 -4
modules/pmbl.py
CHANGED
@@ -102,7 +102,7 @@ class PMBL:
|
|
102 |
yield chunk
|
103 |
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
-
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, mlock=True)
|
106 |
|
107 |
response = llm(
|
108 |
system_prompt,
|
@@ -110,9 +110,7 @@ class PMBL:
|
|
110 |
temperature=0.7,
|
111 |
stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
|
112 |
echo=False,
|
113 |
-
stream=True
|
114 |
-
use_flash_attn=True,
|
115 |
-
mlock=True,
|
116 |
)
|
117 |
|
118 |
response_text = ""
|
|
|
102 |
yield chunk
|
103 |
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
+
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, use_flash_attn=True, mlock=True)
|
106 |
|
107 |
response = llm(
|
108 |
system_prompt,
|
|
|
110 |
temperature=0.7,
|
111 |
stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
|
112 |
echo=False,
|
113 |
+
stream=True
|
|
|
|
|
114 |
)
|
115 |
|
116 |
response_text = ""
|