Spaces:

Spestly
/

AtlasUI

Sleeping

App Files Files Community

Spestly commited on Jan 26

Commit

7c8d3e4

verified ·

1 Parent(s): 8653d1f

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
 import re
 import os
 # Load Hugging Face token
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -19,6 +20,7 @@ MODELS = {
         },
         "emoji": "🦁",
         "experimental": True,
     },
 }
@@ -84,7 +86,7 @@ class AtlasInferenceApp:
         except Exception as e:
             return f"❌ Error: {str(e)}"
-    def respond(self, message, max_tokens, temperature, top_p, top_k):
         if not st.session_state.current_model["model"]:
             return "⚠️ Please select and load a model first"
@@ -104,6 +106,7 @@ class AtlasInferenceApp:
             # Generate response with streaming
             response_container = st.empty()  # Placeholder for streaming text
             full_response = ""
             with torch.no_grad():
                 for chunk in st.session_state.current_model["model"].generate(
                     input_ids=inputs.input_ids,
@@ -116,19 +119,18 @@ class AtlasInferenceApp:
                     pad_token_id=st.session_state.current_model["tokenizer"].pad_token_id,
                     eos_token_id=st.session_state.current_model["tokenizer"].eos_token_id,
                 ):
-                    # Decode the chunk and update the response
-                    try:
-                        chunk_text = st.session_state.current_model["tokenizer"].decode(chunk, skip_special_tokens=True)
-                        # Remove the prompt from the response
-                        if prompt in chunk_text:
-                            chunk_text = chunk_text.replace(prompt, "").strip()
-                        full_response += chunk_text
-                        response_container.markdown(full_response)
-                    except Exception as decode_error:
-                        st.error(f"⚠️ Token Decoding Error: {str(decode_error)}")
-                        break
                     # Stop if the response is too long or incomplete
                     if len(full_response) >= max_tokens * 4:  # Approximate token-to-character ratio
@@ -182,16 +184,25 @@ class AtlasInferenceApp:
                 avatar=USER_PFP if message["role"] == "user" else AI_PFP
             ):
                 st.markdown(message["content"])
         # Input box for user messages
         if prompt := st.chat_input("Message Atlas..."):
-            st.session_state.chat_history.append({"role": "user", "content": prompt})
             with st.chat_message("user", avatar=USER_PFP):
                 st.markdown(prompt)
             with st.chat_message("assistant", avatar=AI_PFP):
                 with st.spinner("Generating response..."):
-                    response = self.respond(prompt, max_tokens, temperature, top_p, top_k)
                     st.markdown(response)
             st.session_state.chat_history.append({"role": "assistant", "content": response})

 from huggingface_hub import login
 import re
 import os
+from PIL import Image
 # Load Hugging Face token
 HF_TOKEN = os.getenv("HF_TOKEN")
         },
         "emoji": "🦁",
         "experimental": True,
+        "is_vision": False,  # Enable vision support for this model
     },
 }
         except Exception as e:
             return f"❌ Error: {str(e)}"
+    def respond(self, message, max_tokens, temperature, top_p, top_k, image=None):
         if not st.session_state.current_model["model"]:
             return "⚠️ Please select and load a model first"
             # Generate response with streaming
             response_container = st.empty()  # Placeholder for streaming text
             full_response = ""
+            generated_tokens = []  # Track generated tokens to avoid duplicates
             with torch.no_grad():
                 for chunk in st.session_state.current_model["model"].generate(
                     input_ids=inputs.input_ids,
                     pad_token_id=st.session_state.current_model["tokenizer"].pad_token_id,
                     eos_token_id=st.session_state.current_model["tokenizer"].eos_token_id,
                 ):
+                    # Decode only the new tokens
+                    new_tokens = chunk[:, inputs.input_ids.shape[1]:]  # Exclude input tokens
+                    generated_tokens.extend(new_tokens[0].tolist())  # Add new tokens to the list
+                    chunk_text = st.session_state.current_model["tokenizer"].decode(generated_tokens, skip_special_tokens=True)
+                    # Remove the prompt from the response
+                    if prompt in chunk_text:
+                        chunk_text = chunk_text.replace(prompt, "").strip()
+                    # Update the response
+                    full_response = chunk_text
+                    response_container.markdown(full_response)
                     # Stop if the response is too long or incomplete
                     if len(full_response) >= max_tokens * 4:  # Approximate token-to-character ratio
                 avatar=USER_PFP if message["role"] == "user" else AI_PFP
             ):
                 st.markdown(message["content"])
+                if "image" in message:
+                    st.image(message["image"], caption="Uploaded Image", use_column_width=True)
         # Input box for user messages
         if prompt := st.chat_input("Message Atlas..."):
+            # Allow image upload if the model supports vision
+            uploaded_image = None
+            if MODELS[model_key]["is_vision"]:
+                uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
+            st.session_state.chat_history.append({"role": "user", "content": prompt, "image": uploaded_image})
             with st.chat_message("user", avatar=USER_PFP):
                 st.markdown(prompt)
+                if uploaded_image:
+                    st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
             with st.chat_message("assistant", avatar=AI_PFP):
                 with st.spinner("Generating response..."):
+                    response = self.respond(prompt, max_tokens, temperature, top_p, top_k, image=uploaded_image)
                     st.markdown(response)
             st.session_state.chat_history.append({"role": "assistant", "content": response})