model-memory-usage

Sleeping

App Files Files Community

muellerzr HF staff commited on Aug 24, 2023

Commit

0ecaccb

•

1 Parent(s): 40e2c53

Working version

Browse files

Files changed (2) hide show

app.py +84 -61
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,44 +1,45 @@
 import re
 import webbrowser
 import pandas as pd
 import gradio as gr
 from huggingface_hub import HfApi
-from accelerate.commands.estimate import create_empty_model
 from accelerate.utils import convert_bytes, calculate_maximum_sizes
 # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
 HAS_DISCUSSION = True
 MODEL_NAME = None
 LIBRARY = None
-TRUST_REMOTE_CODE = False
-# We use this class to check if a discussion has been opened on the model by `huggingface_model_memory_bot`
-hf_api = HfApi()
 def check_for_discussion(model_name:str):
-    "Checks if a discussion has been opened on the model"
-    global hf_api
-    discussions = list(hf_api.get_repo_discussions(model_name))
-    return any(discussion.title == "[AUTOMATED] Model Memory Requirements" for discussion in discussions)
 def report_results():
-    "Reports the results of a memory calculation to the model's discussion"
-    global MODEL_NAME, LIBRARY, TRUST_REMOTE_CODE
-    _, results = calculate_memory(MODEL_NAME, LIBRARY, ["float32", "float16", "int8", "int4"], TRUST_REMOTE_CODE, raw=True)
     post = f"""# Model Memory Requirements\n
-These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/muellerzr/model-memory-utility) on the Hub.
-The minimum recommended vRAM needed for this model to perform inference via [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) is denoted by the size of the "largest layer" and training of the model is roughly 4x its total size (for Adam).
-## Results
 """
-    global hf_api
-    post += results.to_markdown(index=False)
-    # Uncomment when ready to go live
-    # discussion = hf_api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
-    # webbrowser.open_new_tab(discussion.url)
 def convert_url_to_name(url:str):
     "Converts a model URL to its name on the Hub"
@@ -47,18 +48,33 @@ def convert_url_to_name(url:str):
         raise ValueError(f"URL {url} is not a valid model URL to the Hugging Face Hub")
     return results[0]
-def calculate_memory(model_name:str, library:str, options:list, trust_remote_code:bool, raw=False):
     "Calculates the memory usage for a model"
     if library == "auto":
         library = None
-    if "huggingface.co" in model_name:
-        model_name = convert_url_to_name(model_name)
-    model = create_empty_model(model_name, library_name=library, trust_remote_code=trust_remote_code)
     total_size, largest_layer = calculate_maximum_sizes(model)
     data = []
-    title = f"Memory Usage for `{model_name}`"
     for dtype in options:
         dtype_total_size = total_size
         dtype_largest_layer = largest_layer[0]
@@ -76,57 +92,64 @@ def calculate_memory(model_name:str, library:str, options:list, trust_remote_cod
         dtype_largest_layer = convert_bytes(dtype_largest_layer)
         data.append({
             "dtype": dtype,
-            "Largest Layer": dtype_largest_layer,
             "Total Size": dtype_total_size,
             "Training using Adam": dtype_training_size
         })
-    global HAS_DISCUSSION, MODEL_NAME, LIBRARY, TRUST_REMOTE_CODE
     HAS_DISCUSSION = check_for_discussion(model_name)
     MODEL_NAME = model_name
     LIBRARY = library
-    TRUST_REMOTE_CODE = trust_remote_code
-    results = [f'## {title}', pd.DataFrame(data)]
-    if not raw:
-        results += [gr.update(visible=not HAS_DISCUSSION)]
     return results
 with gr.Blocks() as demo:
-    gr.Markdown(
-        """# Model Memory Calculator
-        This tool will help you calculate how much vRAM is needed to train and perform big model inference
-        on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
-        is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
-        Currently this tool supports all models hosted that use `transformers` and `timm`.
-        To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
-        select which framework it originates from ("auto" will try and detect it from the model metadata), and
-        what precisions you want to use.
-        """
-    )
-    out_text = gr.Markdown()
-    out = gr.DataFrame(
-        headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
-        interactive=False,
-    )
-    inp = gr.Textbox(label="Model Name or URL")
-    with gr.Row():
-        library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
-        options = gr.CheckboxGroup(
-            ["float32", "float16", "int8", "int4"],
-            value="float32"
         )
-        trust_remote_code = gr.Checkbox(label="Trust Remote Code", value=False)
-    btn = gr.Button("Calculate Memory Usage")
-    post_to_hub = gr.Button(value = "Report results in this model repo's discussions!", visible=False)
     btn.click(
-        calculate_memory, inputs=[inp, library, options, trust_remote_code], outputs=[out_text, out, post_to_hub],
     )
-    post_to_hub.click(report_results)
 demo.launch()

+import os
 import re
 import webbrowser
 import pandas as pd
 import gradio as gr
 from huggingface_hub import HfApi
+from huggingface_hub.utils import RepositoryNotFoundError, GatedRepoError
+from accelerate.commands.estimate import create_empty_model, check_has_model
 from accelerate.utils import convert_bytes, calculate_maximum_sizes
 # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
 HAS_DISCUSSION = True
 MODEL_NAME = None
 LIBRARY = None
+TOKEN = os.environ.get("HUGGINGFACE_API_LOGIN", None)
 def check_for_discussion(model_name:str):
+    "Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
+    api = HfApi(token=TOKEN)
+    discussions = list(api.get_repo_discussions(model_name))
+    return any(discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot" for discussion in discussions)
 def report_results():
+    "Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
+    global MODEL_NAME, LIBRARY
+    api = HfApi(token=TOKEN)
+    results = calculate_memory(MODEL_NAME, LIBRARY, ["fp32", "fp16", "int8", "int4"], raw=True)
     post = f"""# Model Memory Requirements\n
+These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
+The minimum recommended vRAM needed for this model to be loaded into memory via [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) is denoted by the size of the "largest layer".
+When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
+When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
+## Results:
+{results}
 """
+    discussion = api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
+    webbrowser.open_new_tab(discussion.url)
 def convert_url_to_name(url:str):
     "Converts a model URL to its name on the Hub"
         raise ValueError(f"URL {url} is not a valid model URL to the Hugging Face Hub")
     return results[0]
+def calculate_memory(model_name:str, library:str, options:list, access_token:str, raw=False):
     "Calculates the memory usage for a model"
     if library == "auto":
         library = None
+    if "http" in model_name and "//" in model_name:
+        try:
+            model_name = convert_url_to_name(model_name)
+        except ValueError:
+            raise gr.Error(f"URL `{model_name}` is not a valid model URL to the Hugging Face Hub")
+    try:
+        model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
+    except GatedRepoError:
+        raise gr.Error(f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access.")
+    except RepositoryNotFoundError:
+        raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
+    except ValueError as e:
+        raise gr.Error(f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)")
+    except (RuntimeError, OSError) as e:
+        library = check_has_model(e)
+        if library != "unknown":
+            raise gr.Error(f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo.")
     total_size, largest_layer = calculate_maximum_sizes(model)
     data = []
+    title = f"Memory Usage for '{model_name}'"
     for dtype in options:
         dtype_total_size = total_size
         dtype_largest_layer = largest_layer[0]
         dtype_largest_layer = convert_bytes(dtype_largest_layer)
         data.append({
             "dtype": dtype,
+            "Largest Layer or Residual Group": dtype_largest_layer,
             "Total Size": dtype_total_size,
             "Training using Adam": dtype_training_size
         })
+    global HAS_DISCUSSION, MODEL_NAME, LIBRARY
     HAS_DISCUSSION = check_for_discussion(model_name)
     MODEL_NAME = model_name
     LIBRARY = library
+    if raw:
+        return pd.DataFrame(data).to_markdown(index=False)
+    results = [
+        f'## {title}',
+        gr.update(visible=True, value=pd.DataFrame(data)),
+        gr.update(visible=not HAS_DISCUSSION)
+    ]
     return results
 with gr.Blocks() as demo:
+    with gr.Column():
+        gr.Markdown(
+            """# Model Memory Calculator
+    This tool will help you calculate how much vRAM is needed to train and perform big model inference
+    on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
+    is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
+    Currently this tool supports all models hosted that use `transformers` and `timm`.
+    To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
+    select which framework it originates from ("auto" will try and detect it from the model metadata), and
+    what precisions you want to use."""
+        )
+        out_text = gr.Markdown()
+        out = gr.DataFrame(
+            headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
+            interactive=False,
+            visible=False,
         )
+        with gr.Row():
+            inp = gr.Textbox(label="Model Name or URL")
+        with gr.Row():
+            library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
+            options = gr.CheckboxGroup(
+                ["float32", "float16", "int8", "int4"],
+                value="float32"
+            )
+            access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
+        with gr.Row():
+            btn = gr.Button("Calculate Memory Usage")
+            post_to_hub = gr.Button(value = "Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False)
     btn.click(
+        calculate_memory, inputs=[inp, library, options, access_token], outputs=[out_text, out, post_to_hub],
     )
+    post_to_hub.click(report_results).then(lambda: gr.Button.update(visible=False), outputs=post_to_hub)
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-accelerate @ git+https://github.com/huggingface/accelerate@model-size-estimator
 transformers
 timm
 huggingface_hub

+accelerate @ git+https://github.com/huggingface/accelerate
 transformers
 timm
 huggingface_hub