aws_test

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on 23 days ago

Commit

b7effa9

verified ·

1 Parent(s): 66c68f4

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ import base64
 from huggingface_hub import login
 from botocore.exceptions import NoCredentialsError
 from functools import lru_cache
 HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
@@ -130,9 +131,9 @@ async def generate(request: GenerateRequest):
             )
             if stream:
                 return StreamingResponse(
-                        stream_text(model, tokenizer, input_text,
                                      generation_config, stop_sequences,
-                                     device),
                         media_type="text/plain"
                     )
             else:
@@ -163,9 +164,10 @@ class StopOnSequences(StoppingCriteria):
                  return True
         return False
 async def stream_text(model, tokenizer, input_text,
                         generation_config, stop_sequences,
-                        device):
     encoded_input = tokenizer(
         input_text, return_tensors="pt",
@@ -198,11 +200,10 @@ async def stream_text(model, tokenizer, input_text,
             skip_special_tokens=True
         )
-        if len(new_text) == 0:
             if not stop_criteria(outputs.sequences, None):
-                 yield {"text": output_text, "is_end": False}
-                 yield {"text": "", "is_end": True}
             break
         output_text += new_text
@@ -220,10 +221,11 @@ async def stream_text(model, tokenizer, input_text,
         output_text = ""
-async def stream_json_responses(generator):
     async for data in generator:
         yield json.dumps(data) + "\n"
 async def generate_text(model, tokenizer, input_text,
                         generation_config, stop_sequences,
                         device):

 from huggingface_hub import login
 from botocore.exceptions import NoCredentialsError
 from functools import lru_cache
+from typing import AsyncGenerator
 HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
             )
             if stream:
                 return StreamingResponse(
+                        stream_json_responses(stream_text(model, tokenizer, input_text,
                                      generation_config, stop_sequences,
+                                     device)),
                         media_type="text/plain"
                     )
             else:
                  return True
         return False
 async def stream_text(model, tokenizer, input_text,
                         generation_config, stop_sequences,
+                        device) -> AsyncGenerator[dict, None]:
     encoded_input = tokenizer(
         input_text, return_tensors="pt",
             skip_special_tokens=True
         )
+        if not new_text:
             if not stop_criteria(outputs.sequences, None):
+                yield {"text": output_text, "is_end": False}
+                yield {"text": "", "is_end": True}
             break
         output_text += new_text
         output_text = ""
+async def stream_json_responses(generator: AsyncGenerator[dict, None]) -> AsyncGenerator[str, None]:
     async for data in generator:
         yield json.dumps(data) + "\n"
 async def generate_text(model, tokenizer, input_text,
                         generation_config, stop_sequences,
                         device):