Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Model dropdown
#12
by
muellerzr
HF staff
- opened
- README.md +1 -1
- requirements.txt +2 -2
- src/app.py +23 -56
- src/hub_utils.py +4 -4
- src/model_utils.py +7 -9
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
|
|
4 |
colorFrom: pink
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: src/app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
4 |
colorFrom: pink
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.40.1
|
8 |
app_file: src/app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
accelerate
|
2 |
transformers
|
3 |
timm
|
4 |
-
huggingface_hub
|
5 |
tabulate
|
6 |
einops
|
|
|
1 |
+
accelerate @ git+https://github.com/huggingface/accelerate
|
2 |
transformers
|
3 |
timm
|
4 |
+
huggingface_hub
|
5 |
tabulate
|
6 |
einops
|
src/app.py
CHANGED
@@ -1,84 +1,45 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
-
from accelerate.utils import convert_bytes
|
4 |
from hub_utils import check_for_discussion, report_results
|
5 |
-
from huggingface_hub.utils import HfHubHTTPError
|
6 |
from model_utils import calculate_memory, get_model
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
9 |
def get_results(model_name: str, library: str, options: list, access_token: str):
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
# except HfHubHTTPError:
|
14 |
-
# has_discussion = True
|
15 |
title = f"## Memory usage for '{model_name}'"
|
16 |
-
data = calculate_memory(
|
17 |
-
|
18 |
-
for i, option in enumerate(data):
|
19 |
-
for stage in stages:
|
20 |
-
stages[stage].append(option["Training using Adam (Peak vRAM)"][stage])
|
21 |
-
value = max(data[i]["Training using Adam (Peak vRAM)"].values())
|
22 |
-
if value == -1:
|
23 |
-
value = "N/A"
|
24 |
-
else:
|
25 |
-
value = convert_bytes(value)
|
26 |
-
data[i]["Training using Adam (Peak vRAM)"] = value
|
27 |
-
|
28 |
-
if any(value != -1 for value in stages["model"]):
|
29 |
-
out_explain = "## Training using Adam explained:\n"
|
30 |
-
out_explain += "When training on a batch size of 1, each stage of the training process is expected to have near the following memory results for each precision you selected:\n"
|
31 |
-
memory_values = pd.DataFrame(
|
32 |
-
columns=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"]
|
33 |
-
)
|
34 |
-
for i, dtype in enumerate(options):
|
35 |
-
if stages["model"][i] != -1:
|
36 |
-
memory_values.loc[len(memory_values.index)] = [
|
37 |
-
dtype,
|
38 |
-
convert_bytes(stages["model"][i]),
|
39 |
-
convert_bytes(stages["gradients"][i]),
|
40 |
-
convert_bytes(stages["optimizer"][i]),
|
41 |
-
convert_bytes(stages["step"][i]),
|
42 |
-
]
|
43 |
-
return [
|
44 |
-
title,
|
45 |
-
gr.update(visible=True, value=pd.DataFrame(data)),
|
46 |
-
gr.update(visible=True, value=out_explain),
|
47 |
-
gr.update(visible=True, value=memory_values),
|
48 |
-
]
|
49 |
-
else:
|
50 |
-
return [
|
51 |
-
title,
|
52 |
-
gr.update(visible=True, value=pd.DataFrame(data)),
|
53 |
-
gr.update(visible=False, value=""),
|
54 |
-
gr.update(visible=False, value=pd.DataFrame()),
|
55 |
-
]
|
56 |
|
57 |
|
58 |
with gr.Blocks() as demo:
|
59 |
with gr.Column():
|
60 |
gr.Markdown(
|
61 |
"""<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>π€ Model Memory Calculator</h1>
|
|
|
62 |
This tool will help you calculate how much vRAM is needed to train and perform big model inference
|
63 |
on a model hosted on the π€ Hugging Face Hub. The minimum recommended vRAM needed for a model
|
64 |
is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
|
|
|
65 |
These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
|
|
|
66 |
When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
|
67 |
More tests will be performed in the future to get a more accurate benchmark for each model.
|
|
|
68 |
Currently this tool supports all models hosted that use `transformers` and `timm`.
|
|
|
69 |
To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
|
70 |
select which framework it originates from ("auto" will try and detect it from the model metadata), and
|
71 |
what precisions you want to use."""
|
72 |
)
|
73 |
out_text = gr.Markdown()
|
74 |
out = gr.DataFrame(
|
75 |
-
headers=["dtype", "Largest Layer", "Total Size", "Training using Adam
|
76 |
-
interactive=False,
|
77 |
-
visible=False,
|
78 |
-
)
|
79 |
-
out_explain = gr.Markdown()
|
80 |
-
memory_values = gr.DataFrame(
|
81 |
-
headers=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"],
|
82 |
interactive=False,
|
83 |
visible=False,
|
84 |
)
|
@@ -94,12 +55,18 @@ with gr.Blocks() as demo:
|
|
94 |
access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
|
95 |
with gr.Row():
|
96 |
btn = gr.Button("Calculate Memory Usage")
|
|
|
|
|
|
|
97 |
|
98 |
btn.click(
|
99 |
get_results,
|
100 |
inputs=[inp, library, options, access_token],
|
101 |
-
outputs=[out_text, out,
|
102 |
-
|
|
|
|
|
|
|
103 |
)
|
104 |
|
105 |
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
3 |
from hub_utils import check_for_discussion, report_results
|
|
|
4 |
from model_utils import calculate_memory, get_model
|
5 |
|
6 |
|
7 |
+
# We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
|
8 |
+
MODEL = None
|
9 |
+
|
10 |
+
|
11 |
def get_results(model_name: str, library: str, options: list, access_token: str):
|
12 |
+
global MODEL
|
13 |
+
MODEL = get_model(model_name, library, access_token)
|
14 |
+
has_discussion = check_for_discussion(model_name)
|
|
|
|
|
15 |
title = f"## Memory usage for '{model_name}'"
|
16 |
+
data = calculate_memory(MODEL, options)
|
17 |
+
return [title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=not has_discussion)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
with gr.Blocks() as demo:
|
21 |
with gr.Column():
|
22 |
gr.Markdown(
|
23 |
"""<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>π€ Model Memory Calculator</h1>
|
24 |
+
|
25 |
This tool will help you calculate how much vRAM is needed to train and perform big model inference
|
26 |
on a model hosted on the π€ Hugging Face Hub. The minimum recommended vRAM needed for a model
|
27 |
is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
|
28 |
+
|
29 |
These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
|
30 |
+
|
31 |
When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
|
32 |
More tests will be performed in the future to get a more accurate benchmark for each model.
|
33 |
+
|
34 |
Currently this tool supports all models hosted that use `transformers` and `timm`.
|
35 |
+
|
36 |
To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
|
37 |
select which framework it originates from ("auto" will try and detect it from the model metadata), and
|
38 |
what precisions you want to use."""
|
39 |
)
|
40 |
out_text = gr.Markdown()
|
41 |
out = gr.DataFrame(
|
42 |
+
headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
interactive=False,
|
44 |
visible=False,
|
45 |
)
|
|
|
55 |
access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
|
56 |
with gr.Row():
|
57 |
btn = gr.Button("Calculate Memory Usage")
|
58 |
+
post_to_hub = gr.Button(
|
59 |
+
value="Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False
|
60 |
+
)
|
61 |
|
62 |
btn.click(
|
63 |
get_results,
|
64 |
inputs=[inp, library, options, access_token],
|
65 |
+
outputs=[out_text, out, post_to_hub],
|
66 |
+
)
|
67 |
+
|
68 |
+
post_to_hub.click(report_results, inputs=[inp, library, access_token]).then(
|
69 |
+
lambda: gr.Button.update(visible=False), outputs=post_to_hub
|
70 |
)
|
71 |
|
72 |
|
src/hub_utils.py
CHANGED
@@ -13,7 +13,7 @@ def check_for_discussion(model_name: str):
|
|
13 |
model_name = extract_from_url(model_name)
|
14 |
discussions = list(api.get_repo_discussions(model_name))
|
15 |
return any(
|
16 |
-
discussion.author == "model-sizer-bot"
|
17 |
for discussion in discussions
|
18 |
)
|
19 |
|
@@ -27,9 +27,9 @@ def report_results(model_name, library, access_token):
|
|
27 |
post = f"""# Model Memory Requirements\n
|
28 |
|
29 |
You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
|
30 |
-
|
31 |
-
These calculations were measured from the [Model Memory Utility Space](https://
|
32 |
-
|
33 |
The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
|
34 |
When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
|
35 |
|
|
|
13 |
model_name = extract_from_url(model_name)
|
14 |
discussions = list(api.get_repo_discussions(model_name))
|
15 |
return any(
|
16 |
+
discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot"
|
17 |
for discussion in discussions
|
18 |
)
|
19 |
|
|
|
27 |
post = f"""# Model Memory Requirements\n
|
28 |
|
29 |
You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
|
30 |
+
|
31 |
+
These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
|
32 |
+
|
33 |
The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
|
34 |
When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
|
35 |
|
src/model_utils.py
CHANGED
@@ -3,7 +3,7 @@ from urllib.parse import urlparse
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
-
from accelerate.commands.estimate import check_has_model, create_empty_model
|
7 |
from accelerate.utils import calculate_maximum_sizes, convert_bytes
|
8 |
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
9 |
|
@@ -27,8 +27,8 @@ def extract_from_url(name: str):
|
|
27 |
return path[1:]
|
28 |
|
29 |
|
30 |
-
def
|
31 |
-
"Translates
|
32 |
if not text.endswith("-hf"):
|
33 |
return text + "-hf"
|
34 |
return text
|
@@ -36,8 +36,8 @@ def translate_llama(text):
|
|
36 |
|
37 |
def get_model(model_name: str, library: str, access_token: str):
|
38 |
"Finds and grabs model from the Hub, and initializes on `meta`"
|
39 |
-
if "meta-llama
|
40 |
-
model_name =
|
41 |
if library == "auto":
|
42 |
library = None
|
43 |
model_name = extract_from_url(model_name)
|
@@ -84,12 +84,10 @@ def calculate_memory(model: torch.nn.Module, options: list):
|
|
84 |
dtype_largest_layer = largest_layer[0]
|
85 |
|
86 |
modifier = DTYPE_MODIFIER[dtype]
|
87 |
-
dtype_training_size = estimate_training_usage(
|
88 |
-
dtype_total_size, dtype if dtype != "float16/bfloat16" else "float16"
|
89 |
-
)
|
90 |
dtype_total_size /= modifier
|
91 |
dtype_largest_layer /= modifier
|
92 |
|
|
|
93 |
dtype_total_size = convert_bytes(dtype_total_size)
|
94 |
dtype_largest_layer = convert_bytes(dtype_largest_layer)
|
95 |
data.append(
|
@@ -97,7 +95,7 @@ def calculate_memory(model: torch.nn.Module, options: list):
|
|
97 |
"dtype": dtype,
|
98 |
"Largest Layer or Residual Group": dtype_largest_layer,
|
99 |
"Total Size": dtype_total_size,
|
100 |
-
"Training using Adam
|
101 |
}
|
102 |
)
|
103 |
return data
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
+
from accelerate.commands.estimate import check_has_model, create_empty_model
|
7 |
from accelerate.utils import calculate_maximum_sizes, convert_bytes
|
8 |
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
9 |
|
|
|
27 |
return path[1:]
|
28 |
|
29 |
|
30 |
+
def translate_llama2(text):
|
31 |
+
"Translates llama-2 to its hf counterpart"
|
32 |
if not text.endswith("-hf"):
|
33 |
return text + "-hf"
|
34 |
return text
|
|
|
36 |
|
37 |
def get_model(model_name: str, library: str, access_token: str):
|
38 |
"Finds and grabs model from the Hub, and initializes on `meta`"
|
39 |
+
if "meta-llama" in model_name:
|
40 |
+
model_name = translate_llama2(model_name)
|
41 |
if library == "auto":
|
42 |
library = None
|
43 |
model_name = extract_from_url(model_name)
|
|
|
84 |
dtype_largest_layer = largest_layer[0]
|
85 |
|
86 |
modifier = DTYPE_MODIFIER[dtype]
|
|
|
|
|
|
|
87 |
dtype_total_size /= modifier
|
88 |
dtype_largest_layer /= modifier
|
89 |
|
90 |
+
dtype_training_size = convert_bytes(dtype_total_size * 4)
|
91 |
dtype_total_size = convert_bytes(dtype_total_size)
|
92 |
dtype_largest_layer = convert_bytes(dtype_largest_layer)
|
93 |
data.append(
|
|
|
95 |
"dtype": dtype,
|
96 |
"Largest Layer or Residual Group": dtype_largest_layer,
|
97 |
"Total Size": dtype_total_size,
|
98 |
+
"Training using Adam": dtype_training_size,
|
99 |
}
|
100 |
)
|
101 |
return data
|