gguf-local-server

Running

App Files Files Community

grahamwhiteuk commited on Jan 15

Commit

03c2ae6

1 Parent(s): 0a8d079

feat: advanced settings

Browse files

Signed-off-by: Graham White <[email protected]>

Files changed (1) hide show

src/app.py +32 -7

src/app.py CHANGED Viewed

@@ -46,7 +46,15 @@ tokenizer.use_default_system_prompt = False
 @spaces.GPU
-def generate(message: str, chat_history: list[dict]) -> Iterator[str]:
     """Generate function for chat demo."""
     # Build messages
     conversation = []
@@ -60,7 +68,7 @@ def generate(message: str, chat_history: list[dict]) -> Iterator[str]:
         return_tensors="pt",
         add_generation_prompt=True,
         truncation=True,
-        max_length=MAX_INPUT_TOKEN_LENGTH,
     )
     input_ids = input_ids.to(model.device)
@@ -68,13 +76,13 @@ def generate(message: str, chat_history: list[dict]) -> Iterator[str]:
     generate_kwargs = dict(
         {"input_ids": input_ids},
         streamer=streamer,
-        max_new_tokens=MAX_NEW_TOKENS,
         do_sample=True,
-        top_p=TOP_P,
-        top_k=TOP_K,
-        temperature=TEMPERATURE,
         num_beams=1,
-        repetition_penalty=REPETITION_PENALTY,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
@@ -89,6 +97,15 @@ def generate(message: str, chat_history: list[dict]) -> Iterator[str]:
 css_file_path = Path(Path(__file__).parent / "app.css")
 head_file_path = Path(Path(__file__).parent / "app_head.html")
 with gr.Blocks(
     fill_height=True, css_paths=css_file_path, head_paths=head_file_path, theme=carbon_theme, title=TITLE
@@ -108,6 +125,14 @@ with gr.Blocks(
         ],
         cache_examples=False,
         type="messages",
     )
 if __name__ == "__main__":

 @spaces.GPU
+def generate(
+    message: str,
+    chat_history: list[dict],
+    temperature: float = TEMPERATURE,
+    top_p: float = TOP_P,
+    top_k: float = TOP_K,
+    repetition_penalty: float = REPETITION_PENALTY,
+    max_new_tokens: int = MAX_NEW_TOKENS,
+) -> Iterator[str]:
     """Generate function for chat demo."""
     # Build messages
     conversation = []
         return_tensors="pt",
         add_generation_prompt=True,
         truncation=True,
+        max_length=MAX_INPUT_TOKEN_LENGTH - max_new_tokens,
     )
     input_ids = input_ids.to(model.device)
     generate_kwargs = dict(
         {"input_ids": input_ids},
         streamer=streamer,
+        max_new_tokens=max_new_tokens,
         do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
         num_beams=1,
+        repetition_penalty=repetition_penalty,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
 css_file_path = Path(Path(__file__).parent / "app.css")
 head_file_path = Path(Path(__file__).parent / "app_head.html")
+# advanced settings (displayed in Accordion)
+temperature_slider = gr.Slider(minimum=0, maximum=1.0, value=TEMPERATURE, step=0.1, label="Temperature")
+top_p_slider = gr.Slider(minimum=0, maximum=1.0, value=TOP_P, step=0.05, label="Top P")
+top_k_slider = gr.Slider(minimum=0, maximum=100, value=TOP_K, step=1, label="Top K")
+repetition_penalty_slider = gr.Slider(
+    minimum=0, maximum=2.0, value=REPETITION_PENALTY, step=0.1, label="Repetition Penalty"
+)
+max_new_tokens_slider = gr.Slider(minimum=1, maximum=2000, value=MAX_NEW_TOKENS, step=1, label="Max New Tokens")
+chat_interface_accordion = gr.Accordion(label="Advanced Settings", open=False)
 with gr.Blocks(
     fill_height=True, css_paths=css_file_path, head_paths=head_file_path, theme=carbon_theme, title=TITLE
         ],
         cache_examples=False,
         type="messages",
+        additional_inputs=[
+            temperature_slider,
+            top_p_slider,
+            top_k_slider,
+            repetition_penalty_slider,
+            max_new_tokens_slider,
+        ],
+        additional_inputs_accordion=chat_interface_accordion,
     )
 if __name__ == "__main__":