Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import os | |
import plotly.express as px | |
import numpy as np | |
datadir = 'data/emissions/complete' | |
model_param_df = pd.read_csv('data/model_parameters.csv', header=0) | |
model_performance_df = pd.read_csv('data/performance.csv', header=0) | |
emissions_df = pd.read_csv('data/co2_data.csv',header=0) | |
modalities_df = pd.read_csv('data/modalities_data.csv',header=0) | |
finetuned_df = emissions_df[~emissions_df['task'].str.contains('zero')] | |
finetuned_df['task'] = finetuned_df['task'].str.replace('_',' ') | |
fig0 = px.scatter(emissions_df, x="num_params", y="query emissions (g)", color="model", log_x=True, log_y=True) | |
fig0.update_layout(xaxis={'categoryorder':'mean ascending'}) | |
fig0.update_layout(yaxis_title='Total carbon emitted (g)') | |
fig0.update_layout(xaxis_title='Number of Parameters') | |
fig1 = px.box(finetuned_df, x="task", y="query_energy (kWh)", color="task", log_y=True) | |
fig1.update_layout(xaxis={'categoryorder':'mean ascending'}) | |
fig1.update_layout(yaxis_title='Total energy used (Wh)') | |
fig1.update_layout(xaxis_title='Task') | |
fig2 = px.scatter(modalities_df, x="num_params", y="query emissions (g)", color="modality", | |
log_x=True, log_y=True, custom_data=['model','task']) | |
fig2.update_traces( | |
hovertemplate="<br>".join([ | |
"Model: %{customdata[0]}", | |
"Task: %{customdata[1]}", | |
]) | |
) | |
fig2.update_layout(xaxis_title='Model size (number of parameters)') | |
fig2.update_layout(yaxis_title='Model emissions (g of CO<sub>2</sub>)') | |
demo = gr.Blocks() | |
with demo: | |
gr.Markdown("# CO2 Inference Demo") | |
gr.Markdown("### TL;DR - We ran a series of experiments to measure the energy efficiency and carbon emissions of different\ | |
models from the HuggingFace Hub, and to see how different tasks and models compare.\ | |
We found that multi-purpose, generative models are orders of magnitude more energy-intensive than task-specific systems\ | |
for a variety of tasks, even for models with a similar number of parameters") | |
gr.Markdown("### Explore the plots below to get more insights about the different models and tasks from our study.") | |
with gr.Accordion("More details about our methodology:", open=False): | |
gr.Markdown("We chose ten ML tasks: text classification, token classification, question answering, \ | |
), masked language modeling, text generation, summarization, image classification, object detection, \ | |
image captioning and image generation. For each of the taks, we chose three of the most downloaded datasets and 8 of the most \ | |
downloaded models from the Hugging Face Hub. We ran each of the models ten times over a 1,000 sample from each of the models and measured the energy consumed and carbon emitted.") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## All models from our study (carbon)") | |
gr.Markdown("### Double click on the model name in the list on the right to isolate its datapoints:") | |
gr.Markdown("The axes of the plot are in logarithmic scale, meaning that the difference between the least carbon-intensive and the most carbon-intensive models is over 9,000 times!") | |
gr.Plot(fig0) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Task-by-task comparison (energy)") | |
gr.Markdown("### Grouping the models by task, we can see different patterns emerge:") | |
gr.Markdown("Image generation is by far the most energy- and carbon-intensive task from the ones studied, and text classification \ | |
is the least.") | |
gr.Plot(fig1) | |
with gr.Column(): | |
gr.Markdown("## Modality comparison (carbon)") | |
gr.Markdown("### Grouping the models by their modality shows the different characteristics of each one:") | |
gr.Markdown("We can see that tasks involving images (image-to-text, image-to-category) require more energy and emit more carbon\ | |
than ones inolving text.") | |
gr.Plot(fig2) | |
demo.launch() | |