Revert
Browse files- modules/pmbl.py +4 -18
modules/pmbl.py
CHANGED
@@ -102,18 +102,9 @@ class PMBL:
|
|
102 |
yield chunk
|
103 |
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
n_ctx=n_ctx,
|
109 |
-
n_threads=8,
|
110 |
-
n_gpu_layers=-1,
|
111 |
-
use_mlock=True,
|
112 |
-
use_mmap=True,
|
113 |
-
use_flash_attn=True
|
114 |
-
)
|
115 |
-
|
116 |
-
response = llm.generate(
|
117 |
system_prompt,
|
118 |
max_tokens=1500,
|
119 |
temperature=0.7,
|
@@ -156,12 +147,7 @@ class PMBL:
|
|
156 |
conn.close()
|
157 |
|
158 |
def generate_topic(self, prompt, response):
|
159 |
-
|
160 |
-
model_path=self.model_path,
|
161 |
-
n_ctx=n_ctx,
|
162 |
-
n_threads=2,
|
163 |
-
n_gpu_layers=2,
|
164 |
-
)
|
165 |
|
166 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
167 |
|
|
|
102 |
yield chunk
|
103 |
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
+
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, use_flash_attn=True, mlock=True)
|
106 |
+
|
107 |
+
response = llm(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
system_prompt,
|
109 |
max_tokens=1500,
|
110 |
temperature=0.7,
|
|
|
147 |
conn.close()
|
148 |
|
149 |
def generate_topic(self, prompt, response):
|
150 |
+
llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=2, n_gpu_layers=-1, mlock=True)
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
153 |
|