aws_test

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 27, 2024

Commit

c8741b0

verified ·

1 Parent(s): e77c20c

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -26

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ class GenerateRequest(BaseModel):
     input_text: str = ""
     task_type: str
     temperature: float = 1.0
-    max_new_tokens: int = 200  # this will be limited to 10
     stream: bool = True
     top_p: float = 1.0
     top_k: int = 50
@@ -146,7 +146,7 @@ async def generate(request: GenerateRequest):
         input_text = request.input_text
         task_type = request.task_type
         temperature = request.temperature
-        max_new_tokens = request.max_new_tokens  # This value will be used to constraint the output
         stream = request.stream
         top_p = request.top_p
         top_k = request.top_k
@@ -162,7 +162,7 @@ async def generate(request: GenerateRequest):
         if "text-to-text" == task_type:
             generation_config = GenerationConfig(
                 temperature=temperature,
-                max_new_tokens=min(max_new_tokens, 10),  # Constrain max_new_tokens to 10
                 top_p=top_p,
                 top_k=top_k,
                 repetition_penalty=repetition_penalty,
@@ -173,7 +173,7 @@ async def generate(request: GenerateRequest):
             return StreamingResponse(
                 stream_text(model, tokenizer, input_text,
                              generation_config, stop_sequences,
-                             device, max_length=10),
                 media_type="text/plain"
             )
         else:
@@ -187,21 +187,14 @@ async def generate(request: GenerateRequest):
 async def stream_text(model, tokenizer, input_text,
                         generation_config, stop_sequences,
-                        device, max_length):
     encoded_input = tokenizer(
         input_text, return_tensors="pt",
         truncation=True, max_length=max_length
     ).to(device)
-    input_length = encoded_input["input_ids"].shape[1]
-    remaining_tokens = max_length - input_length
-    if remaining_tokens <= 0:
-        yield ""
-    generation_config.max_new_tokens = min(
-        remaining_tokens, generation_config.max_new_tokens
-    )
     def find_stop(output_text, stop_sequences):
         for seq in stop_sequences:
@@ -210,10 +203,21 @@ async def stream_text(model, tokenizer, input_text,
                 return last_index + len(seq)
         return -1
     output_text = ""
     while True:
         outputs = model.generate(
             **encoded_input,
             do_sample=generation_config.do_sample,
@@ -231,7 +235,7 @@ async def stream_text(model, tokenizer, input_text,
             outputs.sequences[0][len(encoded_input["input_ids"][0]):],
             skip_special_tokens=True
         )
         output_text += new_text
         stop_index = find_stop(output_text, stop_sequences)
@@ -244,26 +248,30 @@ async def stream_text(model, tokenizer, input_text,
                     yield json.dumps({"text": text, "is_end": False}) + "\n"
             yield json.dumps({"text": "", "is_end": True}) + "\n"
             break
         else:
             for chunk in [new_text[i:i+10] for i in range(0, len(new_text), 10)]:
                  for text in chunk.split():
                     yield json.dumps({"text": text, "is_end": False}) + "\n"
-        if len(output_text) >= generation_config.max_new_tokens:
             for chunk in [output_text[i:i+10] for i in range(0, len(output_text), 10)]:
                  for text in chunk.split():
                     yield json.dumps({"text": text, "is_end": False}) + "\n"
             yield json.dumps({"text": "", "is_end": True}) + "\n"
             break
-        encoded_input = tokenizer(
-            output_text, return_tensors="pt",
-            truncation=True, max_length=max_length
-        ).to(device)
 @app.post("/generate-image")
 async def generate_image(request: GenerateRequest):

     input_text: str = ""
     task_type: str
     temperature: float = 1.0
+    max_new_tokens: int = 10
     stream: bool = True
     top_p: float = 1.0
     top_k: int = 50
         input_text = request.input_text
         task_type = request.task_type
         temperature = request.temperature
+        max_new_tokens = request.max_new_tokens
         stream = request.stream
         top_p = request.top_p
         top_k = request.top_k
         if "text-to-text" == task_type:
             generation_config = GenerationConfig(
                 temperature=temperature,
+                max_new_tokens=max_new_tokens,
                 top_p=top_p,
                 top_k=top_k,
                 repetition_penalty=repetition_penalty,
             return StreamingResponse(
                 stream_text(model, tokenizer, input_text,
                              generation_config, stop_sequences,
+                             device),
                 media_type="text/plain"
             )
         else:
 async def stream_text(model, tokenizer, input_text,
                         generation_config, stop_sequences,
+                        device):
+    max_length=10 #Define the max length to cut the text and generate another response
     encoded_input = tokenizer(
         input_text, return_tensors="pt",
         truncation=True, max_length=max_length
     ).to(device)
     def find_stop(output_text, stop_sequences):
         for seq in stop_sequences:
                 return last_index + len(seq)
         return -1
     output_text = ""
     while True:
+        input_length = encoded_input["input_ids"].shape[1]
+        remaining_tokens = max_length - input_length
+        if remaining_tokens <=0:
+           yield json.dumps({"text": "", "is_end": True}) + "\n"
+           break
+        generation_config.max_new_tokens = min(
+        remaining_tokens, generation_config.max_new_tokens
+        )
         outputs = model.generate(
             **encoded_input,
             do_sample=generation_config.do_sample,
             outputs.sequences[0][len(encoded_input["input_ids"][0]):],
             skip_special_tokens=True
         )
         output_text += new_text
         stop_index = find_stop(output_text, stop_sequences)
                     yield json.dumps({"text": text, "is_end": False}) + "\n"
             yield json.dumps({"text": "", "is_end": True}) + "\n"
             break
         else:
             for chunk in [new_text[i:i+10] for i in range(0, len(new_text), 10)]:
                  for text in chunk.split():
                     yield json.dumps({"text": text, "is_end": False}) + "\n"
+        if len(output_text) >= max_length:
+            encoded_input = tokenizer(
+            output_text, return_tensors="pt",
+            truncation=True, max_length=max_length
+            ).to(device)
+            output_text = ""
+        elif len(output_text) < max_length and len(new_text) == 0:
             for chunk in [output_text[i:i+10] for i in range(0, len(output_text), 10)]:
                  for text in chunk.split():
                     yield json.dumps({"text": text, "is_end": False}) + "\n"
             yield json.dumps({"text": "", "is_end": True}) + "\n"
             break
 @app.post("/generate-image")
 async def generate_image(request: GenerateRequest):