Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
·
67d913f
1
Parent(s):
9aeba67
llama支持流式传输
Browse files- modules/models.py +17 -30
modules/models.py
CHANGED
@@ -342,6 +342,7 @@ class LLaMA_Client(BaseLLMModel):
|
|
342 |
def _get_llama_style_input(self):
|
343 |
history = [x["content"] for x in self.history]
|
344 |
context = "\n".join(history)
|
|
|
345 |
return context
|
346 |
|
347 |
def get_answer_at_once(self):
|
@@ -359,40 +360,26 @@ class LLaMA_Client(BaseLLMModel):
|
|
359 |
)
|
360 |
|
361 |
response = output_dataset.to_dict()["instances"][0]["text"]
|
362 |
-
|
363 |
-
try:
|
364 |
-
index = response.index(self.end_string)
|
365 |
-
except ValueError:
|
366 |
-
response += self.end_string
|
367 |
-
index = response.index(self.end_string)
|
368 |
-
|
369 |
-
response = response[: index + 1]
|
370 |
return response, len(response)
|
371 |
|
372 |
def get_answer_stream_iter(self):
|
373 |
context = self._get_llama_style_input()
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
except ValueError:
|
391 |
-
response += self.end_string
|
392 |
-
index = response.index(self.end_string)
|
393 |
-
|
394 |
-
response = response[: index + 1]
|
395 |
-
yield response
|
396 |
|
397 |
|
398 |
class ModelManager:
|
|
|
342 |
def _get_llama_style_input(self):
|
343 |
history = [x["content"] for x in self.history]
|
344 |
context = "\n".join(history)
|
345 |
+
context += "\nOutput:"
|
346 |
return context
|
347 |
|
348 |
def get_answer_at_once(self):
|
|
|
360 |
)
|
361 |
|
362 |
response = output_dataset.to_dict()["instances"][0]["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
return response, len(response)
|
364 |
|
365 |
def get_answer_stream_iter(self):
|
366 |
context = self._get_llama_style_input()
|
367 |
+
partial_text = ""
|
368 |
+
for i in range(self.max_generation_token):
|
369 |
+
input_dataset = self.dataset.from_dict(
|
370 |
+
{"type": "text_only", "instances": [{"text": context+partial_text}]}
|
371 |
+
)
|
372 |
+
output_dataset = self.inferencer.inference(
|
373 |
+
model=self.model,
|
374 |
+
dataset=input_dataset,
|
375 |
+
max_new_tokens=1,
|
376 |
+
temperature=self.temperature,
|
377 |
+
)
|
378 |
+
response = output_dataset.to_dict()["instances"][0]["text"]
|
379 |
+
if response == "":
|
380 |
+
break
|
381 |
+
partial_text += response
|
382 |
+
yield partial_text
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
|
385 |
class ModelManager:
|