Vintern-1B-v3_5-Demo

Running on Zero

App Files Files Community

baohuynhbk14 commited on Jan 12

Commit

ca55e17

1 Parent(s): 8f8d1e9

Update model references and titles to reflect new versioning scheme

Browse files

Files changed (4) hide show

app.py +6 -10
controller.py +1 -1
gradio_web_server.py +2 -2
model_worker.py +1 -12

app.py CHANGED Viewed

@@ -124,7 +124,7 @@ def vote_last_response(state, liked, request: gr.Request):
     conv_data = {
         "tstamp": round(time.time(), 4),
         "like": liked,
-        "model": 'InternVL2.5-78B',
         "state": state.dict(),
         "ip": request.client.host,
     }
@@ -155,7 +155,7 @@ def vote_selected_response(
         "tstamp": round(time.time(), 4),
         "like": data.liked,
         "index": data.index,
-        "model": 'InternVL2.5-78B',
         "state": state.dict(),
         "ip": request.client.host,
     }
@@ -230,7 +230,7 @@ def http_bot(
     max_input_tiles,
     request: gr.Request,
 ):
-    model_name = 'InternVL2.5-78B'
     logger.info(f"http_bot. ip: {request.client.host}")
     start_tstamp = time.time()
     if hasattr(state, "skip_next") and state.skip_next:
@@ -320,11 +320,7 @@ def http_bot(
         return
     ai_response = state.return_last_message()
-    if "<ref>" in ai_response:
-        returned_image = find_bounding_boxes(state, ai_response)
-        returned_image = [returned_image] if returned_image else []
-        state.update_message(Conversation.ASSISTANT, ai_response, returned_image)
     state.end_of_current_turn()
     yield (
@@ -415,7 +411,7 @@ function createWaveAnimation() {
         text.style.color = 'transparent';
         text.style.fontSize = '28px';
         text.style.width = 'auto';
-        text.textContent = 'InternVL2';
         text.style.fontWeight = 'bold';
         i += 1;
     }, 200);
@@ -442,7 +438,7 @@ def build_demo():
     )
     with gr.Blocks(
-        title="InternVL-Chat",
         theme=gr.themes.Default(),
         css=block_css,
     ) as demo:

     conv_data = {
         "tstamp": round(time.time(), 4),
         "like": liked,
+        "model": 'Vintern-1B-v3',
         "state": state.dict(),
         "ip": request.client.host,
     }
         "tstamp": round(time.time(), 4),
         "like": data.liked,
         "index": data.index,
+        "model": 'Vintern-1B-v3',
         "state": state.dict(),
         "ip": request.client.host,
     }
     max_input_tiles,
     request: gr.Request,
 ):
+    model_name = 'Vintern-1B-v3'
     logger.info(f"http_bot. ip: {request.client.host}")
     start_tstamp = time.time()
     if hasattr(state, "skip_next") and state.skip_next:
         return
     ai_response = state.return_last_message()
     state.end_of_current_turn()
     yield (
         text.style.color = 'transparent';
         text.style.fontSize = '28px';
         text.style.width = 'auto';
+        text.textContent = 'Vintern-1B';
         text.style.fontWeight = 'bold';
         i += 1;
     }, 200);
     )
     with gr.Blocks(
+        title="Vintern-Chat",
         theme=gr.themes.Default(),
         css=block_css,
     ) as demo:

controller.py CHANGED Viewed

@@ -115,7 +115,7 @@ class Controller:
         def extract_key(s):
             if 'Pro' in s:
                 return 999
-            match = re.match(r'InternVL2-(\d+)B', s)
             if match:
                 return int(match.group(1))
             return -1

         def extract_key(s):
             if 'Pro' in s:
                 return 999
+            match = re.match(r'Vintern-(\d+)B', s)
             if match:
                 return int(match.group(1))
             return -1

gradio_web_server.py CHANGED Viewed

@@ -44,9 +44,9 @@ def write2file(path, content):
 def sort_models(models):
     def custom_sort_key(model_name):
         # InternVL-Chat-V1-5 should be the first item
-        if model_name == "InternVL-Chat-V1-5":
             return (1, model_name)  # 1 indicates highest precedence
-        elif model_name.startswith("InternVL-Chat-V1-5-"):
             return (1, model_name)  # 1 indicates highest precedence
         else:
             return (0, model_name)  # 0 indicates normal order

 def sort_models(models):
     def custom_sort_key(model_name):
         # InternVL-Chat-V1-5 should be the first item
+        if model_name == "Vintern-1B-v3":
             return (1, model_name)  # 1 indicates highest precedence
+        elif model_name.startswith("Vintern-1B-v3"):
             return (1, model_name)  # 1 indicates highest precedence
         else:
             return (0, model_name)  # 0 indicates normal order

model_worker.py CHANGED Viewed

@@ -130,12 +130,7 @@ def split_model(model_name):
     device_map = {}
     world_size = torch.cuda.device_count()
     num_layers = {
-        "InternVL2-8B": 32,
-        "InternVL2-26B": 48,
-        "InternVL2-40B": 60,
-        "InternVL2-Llama3-76B": 80,
-        "InternVL2-78B": 80,
-        "InternVL2-Pro": 80,
     }[model_name]
     # Since the first GPU will be used for ViT, treat it as half a GPU.
     num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
@@ -207,12 +202,6 @@ class ModelWorker:
         tokenizer = AutoTokenizer.from_pretrained(
             model_path, trust_remote_code=True, use_fast=False
         )
-        tokens_to_keep = ["<box>", "</box>", "<ref>", "</ref>"]
-        tokenizer.additional_special_tokens = [
-            item
-            for item in tokenizer.additional_special_tokens
-            if item not in tokens_to_keep
-        ]
         self.tokenizer = tokenizer
         if device == "auto":

     device_map = {}
     world_size = torch.cuda.device_count()
     num_layers = {
+        "Vintern-1B-v3": 24,
     }[model_name]
     # Since the first GPU will be used for ViT, treat it as half a GPU.
     num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
         tokenizer = AutoTokenizer.from_pretrained(
             model_path, trust_remote_code=True, use_fast=False
         )
         self.tokenizer = tokenizer
         if device == "auto":