Spaces:
Running
Running
<html lang="en"><head> | |
<script src="accelerate_files/libs/clipboard/clipboard.min.js"></script> | |
<script src="accelerate_files/libs/quarto-html/tabby.min.js"></script> | |
<script src="accelerate_files/libs/quarto-html/popper.min.js"></script> | |
<script src="accelerate_files/libs/quarto-html/tippy.umd.min.js"></script> | |
<link href="accelerate_files/libs/quarto-html/tippy.css" rel="stylesheet"> | |
<link href="accelerate_files/libs/quarto-html/light-border.css" rel="stylesheet"> | |
<link href="accelerate_files/libs/quarto-html/quarto-html.min.css" rel="stylesheet" data-mode="light"> | |
<link href="accelerate_files/libs/quarto-html/quarto-syntax-highlighting-dark.css" rel="stylesheet" id="quarto-text-highlighting-styles"> | |
<script src="accelerate_files/libs/quarto-contrib/videojs/video.min.js"></script> | |
<link href="accelerate_files/libs/quarto-contrib/videojs/video-js.css" rel="stylesheet"><meta charset="utf-8"> | |
<meta name="generator" content="quarto-1.3.450"> | |
<title>Hugging Face Accelerate: Making device-agnostic ML training and inference easy at scale</title> | |
<meta name="apple-mobile-web-app-capable" content="yes"> | |
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui"> | |
<link rel="stylesheet" href="accelerate_files/libs/revealjs/dist/reset.css"> | |
<link rel="stylesheet" href="accelerate_files/libs/revealjs/dist/reveal.css"> | |
<style> | |
code{white-space: pre-wrap;} | |
span.smallcaps{font-variant: small-caps;} | |
div.columns{display: flex; gap: min(4vw, 1.5em);} | |
div.column{flex: auto; overflow-x: auto;} | |
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} | |
ul.task-list{list-style: none;} | |
ul.task-list li input[type="checkbox"] { | |
width: 0.8em; | |
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ | |
vertical-align: middle; | |
} | |
/* CSS for syntax highlighting */ | |
pre > code.sourceCode { white-space: pre; position: relative; } | |
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } | |
pre > code.sourceCode > span:empty { height: 1.2em; } | |
.sourceCode { overflow: visible; } | |
code.sourceCode > span { color: inherit; text-decoration: inherit; } | |
div.sourceCode { margin: 1em 0; } | |
pre.sourceCode { margin: 0; } | |
@media screen { | |
div.sourceCode { overflow: auto; } | |
} | |
@media print { | |
pre > code.sourceCode { white-space: pre-wrap; } | |
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } | |
} | |
pre.numberSource code | |
{ counter-reset: source-line 0; } | |
pre.numberSource code > span | |
{ position: relative; left: -4em; counter-increment: source-line; } | |
pre.numberSource code > span > a:first-child::before | |
{ content: counter(source-line); | |
position: relative; left: -1em; text-align: right; vertical-align: baseline; | |
border: none; display: inline-block; | |
-webkit-touch-callout: none; -webkit-user-select: none; | |
-khtml-user-select: none; -moz-user-select: none; | |
-ms-user-select: none; user-select: none; | |
padding: 0 4px; width: 4em; | |
} | |
pre.numberSource { margin-left: 3em; padding-left: 4px; } | |
div.sourceCode | |
{ color: #f8f8f2; } | |
@media screen { | |
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } | |
} | |
code span { color: #f8f8f2; } /* Normal */ | |
code span.al { color: #f07178; background-color: #2a0f15; font-weight: bold; } /* Alert */ | |
code span.an { color: #d4d0ab; } /* Annotation */ | |
code span.at { color: #00e0e0; } /* Attribute */ | |
code span.bn { color: #d4d0ab; } /* BaseN */ | |
code span.bu { color: #abe338; } /* BuiltIn */ | |
code span.cf { color: #ffa07a; font-weight: bold; } /* ControlFlow */ | |
code span.ch { color: #abe338; } /* Char */ | |
code span.cn { color: #ffd700; } /* Constant */ | |
code span.co { color: #f8f8f2; font-style: italic; } /* Comment */ | |
code span.cv { color: #ffd700; } /* CommentVar */ | |
code span.do { color: #f8f8f2; } /* Documentation */ | |
code span.dt { color: #ffa07a; } /* DataType */ | |
code span.dv { color: #d4d0ab; } /* DecVal */ | |
code span.er { color: #f07178; text-decoration: underline; } /* Error */ | |
code span.ex { color: #00e0e0; font-weight: bold; } /* Extension */ | |
code span.fl { color: #d4d0ab; } /* Float */ | |
code span.fu { color: #ffa07a; } /* Function */ | |
code span.im { color: #abe338; } /* Import */ | |
code span.in { color: #d4d0ab; } /* Information */ | |
code span.kw { color: #ffa07a; font-weight: bold; } /* Keyword */ | |
code span.op { color: #ffa07a; } /* Operator */ | |
code span.ot { color: #00e0e0; } /* Other */ | |
code span.pp { color: #dcc6e0; } /* Preprocessor */ | |
code span.re { color: #00e0e0; background-color: #f8f8f2; } /* RegionMarker */ | |
code span.sc { color: #abe338; } /* SpecialChar */ | |
code span.ss { color: #abe338; } /* SpecialString */ | |
code span.st { color: #abe338; } /* String */ | |
code span.va { color: #00e0e0; } /* Variable */ | |
code span.vs { color: #abe338; } /* VerbatimString */ | |
code span.wa { color: #dcc6e0; } /* Warning */ | |
</style> | |
<link rel="stylesheet" href="accelerate_files/libs/revealjs/dist/theme/quarto.css"> | |
<link href="accelerate_files/libs/revealjs/plugin/quarto-line-highlight/line-highlight.css" rel="stylesheet"> | |
<link href="accelerate_files/libs/revealjs/plugin/reveal-menu/menu.css" rel="stylesheet"> | |
<link href="accelerate_files/libs/revealjs/plugin/reveal-menu/quarto-menu.css" rel="stylesheet"> | |
<link href="accelerate_files/libs/revealjs/plugin/quarto-support/footer.css" rel="stylesheet"> | |
<style type="text/css"> | |
.callout { | |
margin-top: 1em; | |
margin-bottom: 1em; | |
border-radius: .25rem; | |
} | |
.callout.callout-style-simple { | |
padding: 0em 0.5em; | |
border-left: solid #acacac .3rem; | |
border-right: solid 1px silver; | |
border-top: solid 1px silver; | |
border-bottom: solid 1px silver; | |
display: flex; | |
} | |
.callout.callout-style-default { | |
border-left: solid #acacac .3rem; | |
border-right: solid 1px silver; | |
border-top: solid 1px silver; | |
border-bottom: solid 1px silver; | |
} | |
.callout .callout-body-container { | |
flex-grow: 1; | |
} | |
.callout.callout-style-simple .callout-body { | |
font-size: 1rem; | |
font-weight: 400; | |
} | |
.callout.callout-style-default .callout-body { | |
font-size: 0.9rem; | |
font-weight: 400; | |
} | |
.callout.callout-titled.callout-style-simple .callout-body { | |
margin-top: 0.2em; | |
} | |
.callout:not(.callout-titled) .callout-body { | |
display: flex; | |
} | |
.callout:not(.no-icon).callout-titled.callout-style-simple .callout-content { | |
padding-left: 1.6em; | |
} | |
.callout.callout-titled .callout-header { | |
padding-top: 0.2em; | |
margin-bottom: -0.2em; | |
} | |
.callout.callout-titled .callout-title p { | |
margin-top: 0.5em; | |
margin-bottom: 0.5em; | |
} | |
.callout.callout-titled.callout-style-simple .callout-content p { | |
margin-top: 0; | |
} | |
.callout.callout-titled.callout-style-default .callout-content p { | |
margin-top: 0.7em; | |
} | |
.callout.callout-style-simple div.callout-title { | |
border-bottom: none; | |
font-size: .9rem; | |
font-weight: 600; | |
opacity: 75%; | |
} | |
.callout.callout-style-default div.callout-title { | |
border-bottom: none; | |
font-weight: 600; | |
opacity: 85%; | |
font-size: 0.9rem; | |
padding-left: 0.5em; | |
padding-right: 0.5em; | |
} | |
.callout.callout-style-default div.callout-content { | |
padding-left: 0.5em; | |
padding-right: 0.5em; | |
} | |
.callout.callout-style-simple .callout-icon::before { | |
height: 1rem; | |
width: 1rem; | |
display: inline-block; | |
content: ""; | |
background-repeat: no-repeat; | |
background-size: 1rem 1rem; | |
} | |
.callout.callout-style-default .callout-icon::before { | |
height: 0.9rem; | |
width: 0.9rem; | |
display: inline-block; | |
content: ""; | |
background-repeat: no-repeat; | |
background-size: 0.9rem 0.9rem; | |
} | |
.callout-title { | |
display: flex | |
} | |
.callout-icon::before { | |
margin-top: 1rem; | |
padding-right: .5rem; | |
} | |
.callout.no-icon::before { | |
display: none ; | |
} | |
.callout.callout-titled .callout-body > .callout-content > :last-child { | |
margin-bottom: 0.5rem; | |
} | |
.callout.callout-titled .callout-icon::before { | |
margin-top: .5rem; | |
padding-right: .5rem; | |
} | |
.callout:not(.callout-titled) .callout-icon::before { | |
margin-top: 1rem; | |
padding-right: .5rem; | |
} | |
/* Callout Types */ | |
div.callout-note { | |
border-left-color: #4582ec ; | |
} | |
div.callout-note .callout-icon::before { | |
background-image: url(''); | |
} | |
div.callout-note.callout-style-default .callout-title { | |
background-color: #dae6fb | |
} | |
div.callout-important { | |
border-left-color: #d9534f ; | |
} | |
div.callout-important .callout-icon::before { | |
background-image: url(''); | |
} | |
div.callout-important.callout-style-default .callout-title { | |
background-color: #f7dddc | |
} | |
div.callout-warning { | |
border-left-color: #f0ad4e ; | |
} | |
div.callout-warning .callout-icon::before { | |
background-image: url(''); | |
} | |
div.callout-warning.callout-style-default .callout-title { | |
background-color: #fcefdc | |
} | |
div.callout-tip { | |
border-left-color: #02b875 ; | |
} | |
div.callout-tip .callout-icon::before { | |
background-image: url(''); | |
} | |
div.callout-tip.callout-style-default .callout-title { | |
background-color: #ccf1e3 | |
} | |
div.callout-caution { | |
border-left-color: #fd7e14 ; | |
} | |
div.callout-caution .callout-icon::before { | |
background-image: url(''); | |
} | |
div.callout-caution.callout-style-default .callout-title { | |
background-color: #ffe5d0 | |
} | |
</style> | |
<style type="text/css"> | |
.reveal div.sourceCode { | |
margin: 0; | |
overflow: auto; | |
} | |
.reveal div.hanging-indent { | |
margin-left: 1em; | |
text-indent: -1em; | |
} | |
.reveal .slide:not(.center) { | |
height: 100%; | |
} | |
.reveal .slide.scrollable { | |
overflow-y: auto; | |
} | |
.reveal .footnotes { | |
height: 100%; | |
overflow-y: auto; | |
} | |
.reveal .slide .absolute { | |
position: absolute; | |
display: block; | |
} | |
.reveal .footnotes ol { | |
counter-reset: ol; | |
list-style-type: none; | |
margin-left: 0; | |
} | |
.reveal .footnotes ol li:before { | |
counter-increment: ol; | |
content: counter(ol) ". "; | |
} | |
.reveal .footnotes ol li > p:first-child { | |
display: inline-block; | |
} | |
.reveal .slide ul, | |
.reveal .slide ol { | |
margin-bottom: 0.5em; | |
} | |
.reveal .slide ul li, | |
.reveal .slide ol li { | |
margin-top: 0.4em; | |
margin-bottom: 0.2em; | |
} | |
.reveal .slide ul[role="tablist"] li { | |
margin-bottom: 0; | |
} | |
.reveal .slide ul li > *:first-child, | |
.reveal .slide ol li > *:first-child { | |
margin-block-start: 0; | |
} | |
.reveal .slide ul li > *:last-child, | |
.reveal .slide ol li > *:last-child { | |
margin-block-end: 0; | |
} | |
.reveal .slide .columns:nth-child(3) { | |
margin-block-start: 0.8em; | |
} | |
.reveal blockquote { | |
box-shadow: none; | |
} | |
.reveal .tippy-content>* { | |
margin-top: 0.2em; | |
margin-bottom: 0.7em; | |
} | |
.reveal .tippy-content>*:last-child { | |
margin-bottom: 0.2em; | |
} | |
.reveal .slide > img.stretch.quarto-figure-center, | |
.reveal .slide > img.r-stretch.quarto-figure-center { | |
display: block; | |
margin-left: auto; | |
margin-right: auto; | |
} | |
.reveal .slide > img.stretch.quarto-figure-left, | |
.reveal .slide > img.r-stretch.quarto-figure-left { | |
display: block; | |
margin-left: 0; | |
margin-right: auto; | |
} | |
.reveal .slide > img.stretch.quarto-figure-right, | |
.reveal .slide > img.r-stretch.quarto-figure-right { | |
display: block; | |
margin-left: auto; | |
margin-right: 0; | |
} | |
</style> | |
</head> | |
<body class="quarto-dark"> | |
<div class="reveal"> | |
<div class="slides"> | |
<section id="title-slide" class="quarto-title-block center"> | |
<h1 class="title">Hugging Face Accelerate: Making device-agnostic ML training and inference easy at scale</h1> | |
<div class="quarto-title-authors"> | |
</div> | |
</section> | |
<section id="who-am-i" class="slide level2"> | |
<h2>Who am I?</h2> | |
<ul> | |
<li>Zachary Mueller</li> | |
<li>Technical Lead for the 🤗 Accelerate project</li> | |
<li>Maintain the <code>transformers</code> Trainer</li> | |
<li>API design geek</li> | |
</ul> | |
</section> | |
<section id="what-is-accelerate" class="slide level2"> | |
<h2>What is 🤗 Accelerate?</h2> | |
<ul> | |
<li>A training framework</li> | |
<li>An inference framework</li> | |
<li>A command-line interface</li> | |
</ul> | |
</section> | |
<section id="a-training-framework" class="slide level2"> | |
<h2>A Training Framework</h2> | |
<ul> | |
<li>Powered by PyTorch</li> | |
<li>Change a few lines of code, gain device <em>and</em> hardware-agnostic capabilities</li> | |
<li>Low-code, with minimal magic aimed at easy hackability and use without high-level abstractions</li> | |
<li>We handle the intracies so you don’t have to</li> | |
</ul> | |
</section> | |
<section id="a-training-framework-1" class="slide level2"> | |
<h2>A Training Framework</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Support for any hardware-accelerator on the market: | |
<ul> | |
<li>CPU, GPU, TPU, XPU, NPU, MLU</li> | |
</ul></li> | |
<li>Automatic mixed-precision training <em>safely</em> in whatever fashion you may choose: | |
<ul> | |
<li>FP16, BF16, FP8 (through either <code>TransformerEngine</code> or <code>MS-AMP</code>)</li> | |
</ul></li> | |
<li>Automatic and efficient gradient accumulation</li> | |
<li>Support for quantization through <code>bitsandbytes</code></li> | |
<li>Support your favorite experiment trackers (<code>aim</code>, <code>clearml</code>, <code>comet_ml</code>, <code>dvc-lite</code>, <code>ml-flow</code>, <code>tensorboard</code>, <code>wandb</code>)</li> | |
<li>Easy to configure plugin or YAML-level API for setting up advanced frameworks like <code>FSDP</code>, <code>DeepSpeed</code>, and <code>Megatron-LM</code></li> | |
</ul> | |
</div> | |
</section> | |
<section id="low-code" class="slide level2"> | |
<h2>Low-Code</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Biggest friction with “wrapper” libraries is control of your code</li> | |
<li>By being minimally intrusive, your code just “works” while still giving you complete control</li> | |
</ul> | |
</div> | |
<div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> | |
<div class="sourceCode" id="cb1"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb1-1"><a href="#cb1-1"></a> import torch</span> | |
<span id="cb1-2"><a href="#cb1-2"></a> import torch.nn.functional as F</span> | |
<span id="cb1-3"><a href="#cb1-3"></a> from datasets import load_dataset</span> | |
<span id="cb1-4"><a href="#cb1-4"></a><span class="va">+ from accelerate import Accelerator</span></span> | |
<span id="cb1-5"><a href="#cb1-5"></a></span> | |
<span id="cb1-6"><a href="#cb1-6"></a><span class="va">+ accelerator = Accelerator()</span></span> | |
<span id="cb1-7"><a href="#cb1-7"></a><span class="st">- device = 'cpu'</span></span> | |
<span id="cb1-8"><a href="#cb1-8"></a><span class="va">+ device = accelerator.device</span></span> | |
<span id="cb1-9"><a href="#cb1-9"></a></span> | |
<span id="cb1-10"><a href="#cb1-10"></a> model = torch.nn.Transformer().to(device)</span> | |
<span id="cb1-11"><a href="#cb1-11"></a> optimizer = torch.optim.Adam(model.parameters())</span> | |
<span id="cb1-12"><a href="#cb1-12"></a> dataset = load_dataset('my_dataset')</span> | |
<span id="cb1-13"><a href="#cb1-13"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> | |
<span id="cb1-14"><a href="#cb1-14"></a></span> | |
<span id="cb1-15"><a href="#cb1-15"></a><span class="va">+ model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)</span></span> | |
<span id="cb1-16"><a href="#cb1-16"></a></span> | |
<span id="cb1-17"><a href="#cb1-17"></a> model.train()</span> | |
<span id="cb1-18"><a href="#cb1-18"></a> for epoch in range(10):</span> | |
<span id="cb1-19"><a href="#cb1-19"></a> for source, targets in dataloader:</span> | |
<span id="cb1-20"><a href="#cb1-20"></a> source, targets = source.to(device), targets.to(device)</span> | |
<span id="cb1-21"><a href="#cb1-21"></a> optimizer.zero_grad()</span> | |
<span id="cb1-22"><a href="#cb1-22"></a> output = model(source)</span> | |
<span id="cb1-23"><a href="#cb1-23"></a> loss = F.cross_entropy(output, targets)</span> | |
<span id="cb1-24"><a href="#cb1-24"></a><span class="st">- loss.backward()</span></span> | |
<span id="cb1-25"><a href="#cb1-25"></a><span class="va">+ accelerator.backward(loss)</span></span> | |
<span id="cb1-26"><a href="#cb1-26"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section> | |
<section id="easy-to-integrate" class="slide level2"> | |
<h2>Easy to integrate</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Due to the low-code nature, it’s trivial to integrate into existing PyTorch frameworks: | |
<ol type="1"> | |
<li>Create an <code>Accelerator</code></li> | |
</ol></li> | |
</ul> | |
</div> | |
<div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> | |
<div class="sourceCode" id="cb2"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb2-1"><a href="#cb2-1"></a> import torch</span> | |
<span id="cb2-2"><a href="#cb2-2"></a> import torch.nn.functional as F</span> | |
<span id="cb2-3"><a href="#cb2-3"></a> from datasets import load_dataset</span> | |
<span id="cb2-4"><a href="#cb2-4"></a><span class="va">+ from accelerate import Accelerator</span></span> | |
<span id="cb2-5"><a href="#cb2-5"></a></span> | |
<span id="cb2-6"><a href="#cb2-6"></a><span class="va">+ accelerator = Accelerator()</span></span> | |
<span id="cb2-7"><a href="#cb2-7"></a> device = 'cpu'</span> | |
<span id="cb2-8"><a href="#cb2-8"></a></span> | |
<span id="cb2-9"><a href="#cb2-9"></a> model = torch.nn.Transformer().to(device)</span> | |
<span id="cb2-10"><a href="#cb2-10"></a> optimizer = torch.optim.Adam(model.parameters())</span> | |
<span id="cb2-11"><a href="#cb2-11"></a> dataset = load_dataset('my_dataset')</span> | |
<span id="cb2-12"><a href="#cb2-12"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> | |
<span id="cb2-13"><a href="#cb2-13"></a></span> | |
<span id="cb2-14"><a href="#cb2-14"></a> model.train()</span> | |
<span id="cb2-15"><a href="#cb2-15"></a> for epoch in range(10):</span> | |
<span id="cb2-16"><a href="#cb2-16"></a> for source, targets in dataloader:</span> | |
<span id="cb2-17"><a href="#cb2-17"></a> source, targets = source.to(device), targets.to(device)</span> | |
<span id="cb2-18"><a href="#cb2-18"></a> optimizer.zero_grad()</span> | |
<span id="cb2-19"><a href="#cb2-19"></a> output = model(source)</span> | |
<span id="cb2-20"><a href="#cb2-20"></a> loss = F.cross_entropy(output, targets)</span> | |
<span id="cb2-21"><a href="#cb2-21"></a> loss.backward()</span> | |
<span id="cb2-22"><a href="#cb2-22"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section> | |
<section id="easy-to-integrate-1" class="slide level2"> | |
<h2>Easy to integrate</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Due to the low-code nature, it’s trivial to integrate into existing PyTorch frameworks: | |
<ol start="2" type="1"> | |
<li>Wrap your PyTorch objects with <code>accelerator.prepare</code> and remove device-placements</li> | |
</ol></li> | |
</ul> | |
</div> | |
<div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> | |
<div class="sourceCode" id="cb3"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb3-1"><a href="#cb3-1"></a> import torch</span> | |
<span id="cb3-2"><a href="#cb3-2"></a> import torch.nn.functional as F</span> | |
<span id="cb3-3"><a href="#cb3-3"></a> from datasets import load_dataset</span> | |
<span id="cb3-4"><a href="#cb3-4"></a> from accelerate import Accelerator</span> | |
<span id="cb3-5"><a href="#cb3-5"></a></span> | |
<span id="cb3-6"><a href="#cb3-6"></a> accelerator = Accelerator()</span> | |
<span id="cb3-7"><a href="#cb3-7"></a><span class="st">- device = 'cpu'</span></span> | |
<span id="cb3-8"><a href="#cb3-8"></a></span> | |
<span id="cb3-9"><a href="#cb3-9"></a> model = torch.nn.Transformer().to(device)</span> | |
<span id="cb3-10"><a href="#cb3-10"></a> optimizer = torch.optim.Adam(model.parameters())</span> | |
<span id="cb3-11"><a href="#cb3-11"></a> dataset = load_dataset('my_dataset')</span> | |
<span id="cb3-12"><a href="#cb3-12"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> | |
<span id="cb3-13"><a href="#cb3-13"></a></span> | |
<span id="cb3-14"><a href="#cb3-14"></a><span class="va">+ model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)</span></span> | |
<span id="cb3-15"><a href="#cb3-15"></a></span> | |
<span id="cb3-16"><a href="#cb3-16"></a> model.train()</span> | |
<span id="cb3-17"><a href="#cb3-17"></a> for epoch in range(10):</span> | |
<span id="cb3-18"><a href="#cb3-18"></a> for source, targets in dataloader:</span> | |
<span id="cb3-19"><a href="#cb3-19"></a> source, targets = source.to(device), targets.to(device)</span> | |
<span id="cb3-20"><a href="#cb3-20"></a> optimizer.zero_grad()</span> | |
<span id="cb3-21"><a href="#cb3-21"></a> output = model(source)</span> | |
<span id="cb3-22"><a href="#cb3-22"></a> loss = F.cross_entropy(output, targets)</span> | |
<span id="cb3-23"><a href="#cb3-23"></a> loss.backward()</span> | |
<span id="cb3-24"><a href="#cb3-24"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section> | |
<section id="easy-to-integrate-2" class="slide level2"> | |
<h2>Easy to integrate</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Due to the low-code nature, it’s trivial to integrate into existing PyTorch frameworks: | |
<ol start="3" type="1"> | |
<li>Use <code>accelerator.backward</code> for the backward pass</li> | |
</ol></li> | |
</ul> | |
</div> | |
<div style="font-size: 60%;padding-left:15%;padding-top:0%;padding-right:20%"> | |
<div class="sourceCode" id="cb4"><pre class="sourceCode numberSource diff number-lines code-with-copy"><code class="sourceCode diff"><span id="cb4-1"><a href="#cb4-1"></a> import torch</span> | |
<span id="cb4-2"><a href="#cb4-2"></a> import torch.nn.functional as F</span> | |
<span id="cb4-3"><a href="#cb4-3"></a> from datasets import load_dataset</span> | |
<span id="cb4-4"><a href="#cb4-4"></a> from accelerate import Accelerator</span> | |
<span id="cb4-5"><a href="#cb4-5"></a></span> | |
<span id="cb4-6"><a href="#cb4-6"></a> accelerator = Accelerator()</span> | |
<span id="cb4-7"><a href="#cb4-7"></a></span> | |
<span id="cb4-8"><a href="#cb4-8"></a> model = torch.nn.Transformer().to(device)</span> | |
<span id="cb4-9"><a href="#cb4-9"></a> optimizer = torch.optim.Adam(model.parameters())</span> | |
<span id="cb4-10"><a href="#cb4-10"></a> dataset = load_dataset('my_dataset')</span> | |
<span id="cb4-11"><a href="#cb4-11"></a> data = torch.utils.data.DataLoader(dataset, shuffle=True)</span> | |
<span id="cb4-12"><a href="#cb4-12"></a></span> | |
<span id="cb4-13"><a href="#cb4-13"></a> model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)</span> | |
<span id="cb4-14"><a href="#cb4-14"></a></span> | |
<span id="cb4-15"><a href="#cb4-15"></a> model.train()</span> | |
<span id="cb4-16"><a href="#cb4-16"></a> for epoch in range(10):</span> | |
<span id="cb4-17"><a href="#cb4-17"></a> for source, targets in dataloader:</span> | |
<span id="cb4-18"><a href="#cb4-18"></a> source, targets = source.to(device), targets.to(device)</span> | |
<span id="cb4-19"><a href="#cb4-19"></a> optimizer.zero_grad()</span> | |
<span id="cb4-20"><a href="#cb4-20"></a> output = model(source)</span> | |
<span id="cb4-21"><a href="#cb4-21"></a> loss = F.cross_entropy(output, targets)</span> | |
<span id="cb4-22"><a href="#cb4-22"></a><span class="st">- loss.backward()</span></span> | |
<span id="cb4-23"><a href="#cb4-23"></a><span class="va">+ accelerator.backward(loss)</span></span> | |
<span id="cb4-24"><a href="#cb4-24"></a> optimizer.step()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section> | |
<section id="but-what-about-inference" class="slide level2"> | |
<h2>But what about inference?</h2> | |
<ul> | |
<li>🤗 Accelerate is not just for training, and has helped make the GPU-Poor take control of the narrative</li> | |
<li>Using tools like Big Model Inference, users with <em>tiny</em> compute can run large models locally</li> | |
<li>Started with the boom of stable diffusion, and now has scaled to having the ability to run huge LLMs locally with a single graphics card</li> | |
</ul> | |
</section> | |
<section id="how-does-it-work" class="slide level2"> | |
<h2>How does it work?</h2> | |
<ul> | |
<li>PyTorch introduced <code>device="meta"</code></li> | |
<li>🤗 Accelerate introduced <code>device_map="auto"</code></li> | |
</ul> | |
<div style="padding-left:15%;padding-right:20%"> | |
<video id="video_shortcode_videojs_video1" width="800" height="400" class="video-js vjs-default-skin " controls="" preload="auto" data-setup="{}" title=""><source src="big_model_visualization.mp4"></video> | |
</div> | |
</section> | |
<section id="a-cli-interface" class="slide level2"> | |
<h2>A CLI Interface</h2> | |
<ul> | |
<li><code>accelerate config</code> | |
<ul> | |
<li>Configure the environment</li> | |
</ul></li> | |
<li><code>accelerate launch</code> | |
<ul> | |
<li>How to run your script</li> | |
</ul></li> | |
</ul> | |
</section> | |
<section id="launching-distributed-training-is-hard" class="slide level2"> | |
<h2>Launching distributed training is hard</h2> | |
<div style="padding-top:0%;padding-left:10%;padding-right:15%;padding-bottom:0%"> | |
<div class="sourceCode" id="cb5"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1"></a><span class="ex">python</span> script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
<div style="padding-left:50%;padding-bottom:0%;padding-top:0%;"> | |
<p>vs.</p> | |
</div> | |
<p><br></p> | |
<div style="padding-top:0%;padding-left:10%;padding-right:15%;padding-bottom:0%"> | |
<div class="sourceCode" id="cb6"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1"></a><span class="ex">torchrun</span> <span class="at">--nnodes</span><span class="op">=</span>1 <span class="at">--nproc_per_node</span><span class="op">=</span>2 script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
<div style="padding-left:50%;padding-bottom:0%;padding-top:0%;"> | |
<p>vs.</p> | |
</div> | |
<p><br></p> | |
<div style="padding-top:0%;padding-left:10%;padding-right:15%;padding-bottom:0%"> | |
<div class="sourceCode" id="cb7"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb7-1"><a href="#cb7-1"></a><span class="ex">deepspeed</span> <span class="at">--num_gpus</span><span class="op">=</span>2 script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
<p><br></p> | |
</div> | |
<p>How can we make this better?</p> | |
</section> | |
<section id="accelerate-launch" class="slide level2"> | |
<h2><code>accelerate launch</code></h2> | |
<div style="padding-top:0%;padding-left:5%;padding-right:10%;padding-bottom:0%"> | |
<div class="sourceCode" id="cb8"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb8-1"><a href="#cb8-1"></a><span class="ex">accelerate</span> launch script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
<p><br></p> | |
<div class="sourceCode" id="cb9"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1"></a><span class="ex">accelerate</span> launch <span class="at">--multi_gpu</span> <span class="at">--num_processes</span> 2 script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
<p><br></p> | |
<div class="sourceCode" id="cb10"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb10-1"><a href="#cb10-1"></a><span class="ex">accelerate</span> launch <span class="dt">\</span></span> | |
<span id="cb10-2"><a href="#cb10-2"></a> <span class="at">--multi_gpu</span> <span class="dt">\ </span></span> | |
<span id="cb10-3"><a href="#cb10-3"></a> <span class="ex">--use_deepspeed</span> <span class="dt">\</span></span> | |
<span id="cb10-4"><a href="#cb10-4"></a> <span class="at">--num_processes</span> 2 <span class="dt">\</span></span> | |
<span id="cb10-5"><a href="#cb10-5"></a> script.py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section> | |
<section id="accelerate-config" class="slide level2"> | |
<h2><code>accelerate config</code></h2> | |
<ul> | |
<li>Rely on <code>config.yaml</code> files</li> | |
<li>Choose to either running <code>accelerate config</code> or write your own:</li> | |
</ul> | |
<div class="columns" style="font-size: 60%;padding-left:5%;padding-right:5%"> | |
<div class="column" style="width:40%;"> | |
<div class="code-with-filename"> | |
<div class="code-with-filename-file"> | |
<pre><strong>ddp_config.yaml</strong></pre> | |
</div> | |
<div class="sourceCode" id="cb11"><pre class="sourceCode numberSource yaml number-lines code-with-copy"><code class="sourceCode yaml"><span id="cb11-1"><a href="#cb11-1"></a><span class="fu">compute_environment</span><span class="kw">:</span><span class="at"> LOCAL_MACHINE</span></span> | |
<span id="cb11-2"><a href="#cb11-2"></a><span class="fu">distributed_type</span><span class="kw">:</span><span class="at"> MULTI_GPU</span></span> | |
<span id="cb11-3"><a href="#cb11-3"></a><span class="fu">main_training_function</span><span class="kw">:</span><span class="at"> main</span></span> | |
<span id="cb11-4"><a href="#cb11-4"></a><span class="fu">mixed_precision</span><span class="kw">:</span><span class="at"> bf16</span></span> | |
<span id="cb11-5"><a href="#cb11-5"></a><span class="fu">num_machines</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span> | |
<span id="cb11-6"><a href="#cb11-6"></a><span class="fu">num_processes</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</div><div class="column" style="width:40%;"> | |
<div class="code-with-filename"> | |
<div class="code-with-filename-file"> | |
<pre><strong>fsdp_config.yaml</strong></pre> | |
</div> | |
<div class="sourceCode" id="cb12"><pre class="sourceCode numberSource yaml number-lines code-with-copy"><code class="sourceCode yaml"><span id="cb12-1"><a href="#cb12-1"></a><span class="fu">compute_environment</span><span class="kw">:</span><span class="at"> LOCAL_MACHINE</span></span> | |
<span id="cb12-2"><a href="#cb12-2"></a><span class="fu">distributed_type</span><span class="kw">:</span><span class="at"> FSDP</span></span> | |
<span id="cb12-3"><a href="#cb12-3"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span> | |
<span id="cb12-4"><a href="#cb12-4"></a><span class="at"> </span><span class="fu">fsdp_auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span> | |
<span id="cb12-5"><a href="#cb12-5"></a><span class="at"> </span><span class="fu">fsdp_backward_prefetch</span><span class="kw">:</span><span class="at"> BACKWARD_PRE</span></span> | |
<span id="cb12-6"><a href="#cb12-6"></a><span class="at"> </span><span class="fu">fsdp_cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span> | |
<span id="cb12-7"><a href="#cb12-7"></a><span class="at"> </span><span class="fu">fsdp_forward_prefetch</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span> | |
<span id="cb12-8"><a href="#cb12-8"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span> | |
<span id="cb12-9"><a href="#cb12-9"></a><span class="at"> </span><span class="fu">fsdp_sharding_strategy</span><span class="kw">:</span><span class="at"> FULL_SHARD</span></span> | |
<span id="cb12-10"><a href="#cb12-10"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> SHARDED_STATE_DICT</span></span> | |
<span id="cb12-11"><a href="#cb12-11"></a><span class="at"> </span><span class="fu">fsdp_sync_module_states</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span> | |
<span id="cb12-12"><a href="#cb12-12"></a><span class="at"> </span><span class="fu">fsdp_use_orig_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span> | |
<span id="cb12-13"><a href="#cb12-13"></a><span class="fu">main_training_function</span><span class="kw">:</span><span class="at"> main</span></span> | |
<span id="cb12-14"><a href="#cb12-14"></a><span class="fu">mixed_precision</span><span class="kw">:</span><span class="at"> bf16</span></span> | |
<span id="cb12-15"><a href="#cb12-15"></a><span class="fu">num_machines</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span> | |
<span id="cb12-16"><a href="#cb12-16"></a><span class="fu">num_processes</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</div> | |
</div> | |
</section> | |
<section id="now-that-youre-up-to-speed-whats-new" class="title-slide slide level1 center"> | |
<h1>Now that you’re up to speed, what’s new?</h1> | |
</section> | |
<section> | |
<section id="weve-had-a-busy-last-year-and-so-has-the-ml-community" class="title-slide slide level1 center"> | |
<h1>We’ve had a busy last year, and so has the ML Community!</h1> | |
</section> | |
<section id="new-training-techniques" class="slide level2"> | |
<h2>New training techniques</h2> | |
<ul> | |
<li>Quantization has taken the field by storm</li> | |
<li>New ideas such as FSDP + QLoRA to train huge models on tiny compute!</li> | |
<li>New precision backends as we train natively on smaller precision</li> | |
<li>Optimizing futher how much we can push on a single machine through efficient RAM and timing techniques</li> | |
</ul> | |
</section> | |
<section id="larger-compute-landscape" class="slide level2"> | |
<h2>Larger compute landscape</h2> | |
<ul> | |
<li>As we search for alternatives to NVIDIA, new compilers rise: | |
<ul> | |
<li>XPU (Intel)</li> | |
<li>NPU (Intel)</li> | |
<li>MLU (Cambricon)</li> | |
</ul></li> | |
</ul> | |
<p>All of which are supported by 🤗 Accelerate</p> | |
</section> | |
<section id="lower-abstractions" class="slide level2"> | |
<h2>Lower abstractions</h2> | |
<ul> | |
<li>While the <code>Accelerator</code> was great, needed better abstractions focused on controlling behaviors</li> | |
<li>Introduced the <code>PartialState</code></li> | |
</ul> | |
<div style="padding-left:10%;padding-top:0%;padding-right:15%"> | |
<div class="sourceCode" id="cb13"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a><span class="im">from</span> accelerate <span class="im">import</span> PartialState</span> | |
<span id="cb13-2"><a href="#cb13-2"></a></span> | |
<span id="cb13-3"><a href="#cb13-3"></a><span class="cf">if</span> PartialState().is_main_process:</span> | |
<span id="cb13-4"><a href="#cb13-4"></a> <span class="co"># Run on only 1 device</span></span> | |
<span id="cb13-5"><a href="#cb13-5"></a></span> | |
<span id="cb13-6"><a href="#cb13-6"></a><span class="cf">with</span> PartialState().main_process_first:</span> | |
<span id="cb13-7"><a href="#cb13-7"></a> <span class="co"># Useful for dataset processing</span></span> | |
<span id="cb13-8"><a href="#cb13-8"></a></span> | |
<span id="cb13-9"><a href="#cb13-9"></a><span class="co"># Device-agnostic without the bulk of the `Accelerator`</span></span> | |
<span id="cb13-10"><a href="#cb13-10"></a>device <span class="op">=</span> PartialState().device</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section> | |
<section id="faster-and-better-inference-alternatives" class="slide level2"> | |
<h2>Faster and better inference alternatives</h2> | |
<div style="font-size:70%"> | |
<ul> | |
<li><code>PiPPy</code> gives us efficient pipeline-parallelism in distributed environments to increase throughput while keeping a simple torch-bound API</li> | |
<li>Rather than having to wait for each GPU, every GPU can be busy in parallel</li> | |
<li>Will be critical as larger LLMs take hold and more than one computer is needed</li> | |
</ul> | |
</div> | |
<div style="font-size:60%;padding-left:19%;padding-top:0%;padding-right:24%;"> | |
<div class="sourceCode" id="cb14"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a><span class="im">import</span> torch</span> | |
<span id="cb14-2"><a href="#cb14-2"></a><span class="im">from</span> transformers <span class="im">import</span> AutoModelForSequenceClassification</span> | |
<span id="cb14-3"><a href="#cb14-3"></a></span> | |
<span id="cb14-4"><a href="#cb14-4"></a><span class="im">from</span> accelerate <span class="im">import</span> PartialState, prepare_pippy</span> | |
<span id="cb14-5"><a href="#cb14-5"></a></span> | |
<span id="cb14-6"><a href="#cb14-6"></a>model <span class="op">=</span> AutoModelForSequenceClassification.from_pretrained(<span class="st">"gpt2"</span>)</span> | |
<span id="cb14-7"><a href="#cb14-7"></a>model.<span class="bu">eval</span>()</span> | |
<span id="cb14-8"><a href="#cb14-8"></a></span> | |
<span id="cb14-9"><a href="#cb14-9"></a><span class="bu">input</span> <span class="op">=</span> torch.randint(</span> | |
<span id="cb14-10"><a href="#cb14-10"></a> low<span class="op">=</span><span class="dv">0</span>,</span> | |
<span id="cb14-11"><a href="#cb14-11"></a> high<span class="op">=</span>model.config.vocab_size,</span> | |
<span id="cb14-12"><a href="#cb14-12"></a> size<span class="op">=</span>(<span class="dv">2</span>, <span class="dv">1024</span>), <span class="co"># bs x seq_len</span></span> | |
<span id="cb14-13"><a href="#cb14-13"></a> device<span class="op">=</span><span class="st">"cpu"</span>,</span> | |
<span id="cb14-14"><a href="#cb14-14"></a>)</span> | |
<span id="cb14-15"><a href="#cb14-15"></a></span> | |
<span id="cb14-16"><a href="#cb14-16"></a>model <span class="op">=</span> prepare_pippy(model, split_points<span class="op">=</span><span class="st">"auto"</span>, example_args<span class="op">=</span>(<span class="bu">input</span>,))</span> | |
<span id="cb14-17"><a href="#cb14-17"></a></span> | |
<span id="cb14-18"><a href="#cb14-18"></a><span class="cf">with</span> torch.no_grad():</span> | |
<span id="cb14-19"><a href="#cb14-19"></a> output <span class="op">=</span> model(<span class="bu">input</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div> | |
</div> | |
</section></section> | |
<section> | |
<section id="adoption-accelerate-in-the-ecosystem" class="title-slide slide level1 center"> | |
<h1>Adoption: Accelerate in the ecosystem</h1> | |
</section> | |
<section id="accelerate-in-the-ecosystem" class="slide level2"> | |
<h2>Accelerate in the Ecosystem</h2> | |
<ul> | |
<li>Many of the frameworks you use daily already rely on 🤗 Accelerate! | |
<ul> | |
<li>Nearly all of 🤗</li> | |
<li><code>axolotl</code></li> | |
<li><code>fastai</code></li> | |
<li><code>FastChat</code></li> | |
<li><code>lucidrains</code></li> | |
<li><code>kornia</code></li> | |
</ul></li> | |
</ul> | |
</section> | |
<section id="accelerate-in-the-ecosystem-1" class="slide level2"> | |
<h2>Accelerate in the Ecosystem</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Started as a way to isolate out distributed code on TPU and <code>DistributedDataParallelism</code></li> | |
</ul> | |
</div> | |
<div style="padding-left: 30%"> | |
<p><img data-src="sylvain_tweet.JPG" style="width:70.0%"></p> | |
</div> | |
</section> | |
<section id="accelerate-in-the-ecosystem-2" class="slide level2"> | |
<h2>Accelerate in the Ecosystem</h2> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li>Now is the backbone of some of the largest PyTorch training frameworks in the ecosystem</li> | |
</ul> | |
</div> | |
<div style="padding-left: 30%;"> | |
<p><img data-src="hf_trainer.JPG" style="width:70.0%"></p> | |
</div> | |
</section></section> | |
<section id="whats-next" class="title-slide slide level1 center"> | |
<h1>What’s next?</h1> | |
</section> | |
<section id="elevating-the-community" class="title-slide slide level1 center"> | |
<h1>Elevating the community</h1> | |
<ul> | |
<li>Now that more advanced training techniques are reachable (FSDP, DeepSpeed, etc), we need to focus on educating the community on how to use it best</li> | |
<li>Goes beyond how to use the <code>Trainer</code> or <code>Accelerator</code>, but how to use <em>what</em> where</li> | |
<li>Keep Accelerate as a tool for the community to utilize when new techniques come out and play with, to push new ideas to scale quickly</li> | |
</ul> | |
</section> | |
<section id="soon" class="title-slide slide level1 center"> | |
<h1>1.0.0: Soon!</h1> | |
<ul> | |
<li>Tried and battle-tested by over 7M users/month | 110M+ total downloads</li> | |
<li>As we’ve been stable for over a year now, we’re near ready to release 1.0.0</li> | |
</ul> | |
</section> | |
<section id="thanks-for-joining" class="title-slide slide level1 center"> | |
<h1>Thanks for joining!</h1> | |
<div style="font-size: 70%;"> | |
<ul> | |
<li><a href="https://hf.co/docs/accelerate">🤗 Accelerate documentation</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/basic_tutorials/launch">Launching distributed code</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/basic_tutorials/notebook">Distributed code and Jupyter Notebooks</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/basic_tutorials/migration">Migrating to 🤗 Accelerate easily</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/usage_guides/big_modeling">Big Model Inference tutorial</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed">DeepSpeed and 🤗 Accelerate</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/usage_guides/fsdp">Fully Sharded Data Parallelism and 🤗 Accelerate</a></li> | |
<li><a href="https://huggingface.co/docs/accelerate/concept_guides/fsdp_and_deepspeed">FSDP vs DeepSpeed In-Depth</a></li> | |
</ul> | |
</div> | |
<div class="footer footer-default"> | |
</div> | |
</section> | |
</div> | |
</div> | |
<script>window.backupDefine = window.define; window.define = undefined;</script> | |
<script src="accelerate_files/libs/revealjs/dist/reveal.js"></script> | |
<!-- reveal.js plugins --> | |
<script src="accelerate_files/libs/revealjs/plugin/quarto-line-highlight/line-highlight.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/pdf-export/pdfexport.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/reveal-menu/menu.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/reveal-menu/quarto-menu.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/quarto-support/support.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/notes/notes.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/search/search.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/zoom/zoom.js"></script> | |
<script src="accelerate_files/libs/revealjs/plugin/math/math.js"></script> | |
<script>window.define = window.backupDefine; window.backupDefine = undefined;</script> | |
<script> | |
// Full list of configuration options available at: | |
// https://revealjs.com/config/ | |
Reveal.initialize({ | |
'controlsAuto': true, | |
'previewLinksAuto': false, | |
'smaller': false, | |
'pdfSeparateFragments': false, | |
'autoAnimateEasing': "ease", | |
'autoAnimateDuration': 1, | |
'autoAnimateUnmatched': true, | |
'menu': {"side":"left","useTextContentForMissingTitles":true,"markers":false,"loadIcons":false,"custom":[{"title":"Tools","icon":"<i class=\"fas fa-gear\"></i>","content":"<ul class=\"slide-menu-items\">\n<li class=\"slide-tool-item active\" data-item=\"0\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.fullscreen(event)\"><kbd>f</kbd> Fullscreen</a></li>\n<li class=\"slide-tool-item\" data-item=\"1\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.speakerMode(event)\"><kbd>s</kbd> Speaker View</a></li>\n<li class=\"slide-tool-item\" data-item=\"2\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.overview(event)\"><kbd>o</kbd> Slide Overview</a></li>\n<li class=\"slide-tool-item\" data-item=\"3\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.togglePdfExport(event)\"><kbd>e</kbd> PDF Export Mode</a></li>\n<li class=\"slide-tool-item\" data-item=\"4\"><a href=\"#\" onclick=\"RevealMenuToolHandlers.keyboardHelp(event)\"><kbd>?</kbd> Keyboard Help</a></li>\n</ul>"}],"openButton":true}, | |
'smaller': false, | |
// Display controls in the bottom right corner | |
controls: false, | |
// Help the user learn the controls by providing hints, for example by | |
// bouncing the down arrow when they first encounter a vertical slide | |
controlsTutorial: false, | |
// Determines where controls appear, "edges" or "bottom-right" | |
controlsLayout: 'edges', | |
// Visibility rule for backwards navigation arrows; "faded", "hidden" | |
// or "visible" | |
controlsBackArrows: 'faded', | |
// Display a presentation progress bar | |
progress: true, | |
// Display the page number of the current slide | |
slideNumber: false, | |
// 'all', 'print', or 'speaker' | |
showSlideNumber: 'all', | |
// Add the current slide number to the URL hash so that reloading the | |
// page/copying the URL will return you to the same slide | |
hash: true, | |
// Start with 1 for the hash rather than 0 | |
hashOneBasedIndex: false, | |
// Flags if we should monitor the hash and change slides accordingly | |
respondToHashChanges: true, | |
// Push each slide change to the browser history | |
history: true, | |
// Enable keyboard shortcuts for navigation | |
keyboard: true, | |
// Enable the slide overview mode | |
overview: true, | |
// Disables the default reveal.js slide layout (scaling and centering) | |
// so that you can use custom CSS layout | |
disableLayout: false, | |
// Vertical centering of slides | |
center: false, | |
// Enables touch navigation on devices with touch input | |
touch: true, | |
// Loop the presentation | |
loop: false, | |
// Change the presentation direction to be RTL | |
rtl: false, | |
// see https://revealjs.com/vertical-slides/#navigation-mode | |
navigationMode: 'linear', | |
// Randomizes the order of slides each time the presentation loads | |
shuffle: false, | |
// Turns fragments on and off globally | |
fragments: true, | |
// Flags whether to include the current fragment in the URL, | |
// so that reloading brings you to the same fragment position | |
fragmentInURL: false, | |
// Flags if the presentation is running in an embedded mode, | |
// i.e. contained within a limited portion of the screen | |
embedded: false, | |
// Flags if we should show a help overlay when the questionmark | |
// key is pressed | |
help: true, | |
// Flags if it should be possible to pause the presentation (blackout) | |
pause: true, | |
// Flags if speaker notes should be visible to all viewers | |
showNotes: false, | |
// Global override for autoplaying embedded media (null/true/false) | |
autoPlayMedia: true, | |
// Global override for preloading lazy-loaded iframes (null/true/false) | |
preloadIframes: null, | |
// Number of milliseconds between automatically proceeding to the | |
// next slide, disabled when set to 0, this value can be overwritten | |
// by using a data-autoslide attribute on your slides | |
autoSlide: 0, | |
// Stop auto-sliding after user input | |
autoSlideStoppable: true, | |
// Use this method for navigation when auto-sliding | |
autoSlideMethod: null, | |
// Specify the average time in seconds that you think you will spend | |
// presenting each slide. This is used to show a pacing timer in the | |
// speaker view | |
defaultTiming: null, | |
// Enable slide navigation via mouse wheel | |
mouseWheel: false, | |
// The display mode that will be used to show slides | |
display: 'block', | |
// Hide cursor if inactive | |
hideInactiveCursor: true, | |
// Time before the cursor is hidden (in ms) | |
hideCursorTime: 5000, | |
// Opens links in an iframe preview overlay | |
previewLinks: false, | |
// Transition style (none/fade/slide/convex/concave/zoom) | |
transition: 'none', | |
// Transition speed (default/fast/slow) | |
transitionSpeed: 'default', | |
// Transition style for full page slide backgrounds | |
// (none/fade/slide/convex/concave/zoom) | |
backgroundTransition: 'none', | |
// Number of slides away from the current that are visible | |
viewDistance: 3, | |
// Number of slides away from the current that are visible on mobile | |
// devices. It is advisable to set this to a lower number than | |
// viewDistance in order to save resources. | |
mobileViewDistance: 2, | |
// The "normal" size of the presentation, aspect ratio will be preserved | |
// when the presentation is scaled to fit different resolutions. Can be | |
// specified using percentage units. | |
width: 1050, | |
height: 700, | |
// Factor of the display size that should remain empty around the content | |
margin: 0.1, | |
math: { | |
mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js', | |
config: 'TeX-AMS_HTML-full', | |
tex2jax: { | |
inlineMath: [['\\(','\\)']], | |
displayMath: [['\\[','\\]']], | |
balanceBraces: true, | |
processEscapes: false, | |
processRefs: true, | |
processEnvironments: true, | |
preview: 'TeX', | |
skipTags: ['script','noscript','style','textarea','pre','code'], | |
ignoreClass: 'tex2jax_ignore', | |
processClass: 'tex2jax_process' | |
}, | |
}, | |
// reveal.js plugins | |
plugins: [QuartoLineHighlight, PdfExport, RevealMenu, QuartoSupport, | |
RevealMath, | |
RevealNotes, | |
RevealSearch, | |
RevealZoom | |
] | |
}); | |
</script> | |
<script id="quarto-html-after-body" type="application/javascript"> | |
window.document.addEventListener("DOMContentLoaded", function (event) { | |
const toggleBodyColorMode = (bsSheetEl) => { | |
const mode = bsSheetEl.getAttribute("data-mode"); | |
const bodyEl = window.document.querySelector("body"); | |
if (mode === "dark") { | |
bodyEl.classList.add("quarto-dark"); | |
bodyEl.classList.remove("quarto-light"); | |
} else { | |
bodyEl.classList.add("quarto-light"); | |
bodyEl.classList.remove("quarto-dark"); | |
} | |
} | |
const toggleBodyColorPrimary = () => { | |
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap"); | |
if (bsSheetEl) { | |
toggleBodyColorMode(bsSheetEl); | |
} | |
} | |
toggleBodyColorPrimary(); | |
const tabsets = window.document.querySelectorAll(".panel-tabset-tabby") | |
tabsets.forEach(function(tabset) { | |
const tabby = new Tabby('#' + tabset.id); | |
}); | |
const isCodeAnnotation = (el) => { | |
for (const clz of el.classList) { | |
if (clz.startsWith('code-annotation-')) { | |
return true; | |
} | |
} | |
return false; | |
} | |
const clipboard = new window.ClipboardJS('.code-copy-button', { | |
text: function(trigger) { | |
const codeEl = trigger.previousElementSibling.cloneNode(true); | |
for (const childEl of codeEl.children) { | |
if (isCodeAnnotation(childEl)) { | |
childEl.remove(); | |
} | |
} | |
return codeEl.innerText; | |
} | |
}); | |
clipboard.on('success', function(e) { | |
// button target | |
const button = e.trigger; | |
// don't keep focus | |
button.blur(); | |
// flash "checked" | |
button.classList.add('code-copy-button-checked'); | |
var currentTitle = button.getAttribute("title"); | |
button.setAttribute("title", "Copied!"); | |
let tooltip; | |
if (window.bootstrap) { | |
button.setAttribute("data-bs-toggle", "tooltip"); | |
button.setAttribute("data-bs-placement", "left"); | |
button.setAttribute("data-bs-title", "Copied!"); | |
tooltip = new bootstrap.Tooltip(button, | |
{ trigger: "manual", | |
customClass: "code-copy-button-tooltip", | |
offset: [0, -8]}); | |
tooltip.show(); | |
} | |
setTimeout(function() { | |
if (tooltip) { | |
tooltip.hide(); | |
button.removeAttribute("data-bs-title"); | |
button.removeAttribute("data-bs-toggle"); | |
button.removeAttribute("data-bs-placement"); | |
} | |
button.setAttribute("title", currentTitle); | |
button.classList.remove('code-copy-button-checked'); | |
}, 1000); | |
// clear code selection | |
e.clearSelection(); | |
}); | |
function tippyHover(el, contentFn) { | |
const config = { | |
allowHTML: true, | |
content: contentFn, | |
maxWidth: 500, | |
delay: 100, | |
arrow: false, | |
appendTo: function(el) { | |
return el.closest('section.slide') || el.parentElement; | |
}, | |
interactive: true, | |
interactiveBorder: 10, | |
theme: 'light-border', | |
placement: 'bottom-start' | |
}; | |
config['offset'] = [0,0]; | |
config['maxWidth'] = 700; | |
window.tippy(el, config); | |
} | |
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); | |
for (var i=0; i<noterefs.length; i++) { | |
const ref = noterefs[i]; | |
tippyHover(ref, function() { | |
// use id or data attribute instead here | |
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); | |
try { href = new URL(href).hash; } catch {} | |
const id = href.replace(/^#\/?/, ""); | |
const note = window.document.getElementById(id); | |
return note.innerHTML; | |
}); | |
} | |
const findCites = (el) => { | |
const parentEl = el.parentElement; | |
if (parentEl) { | |
const cites = parentEl.dataset.cites; | |
if (cites) { | |
return { | |
el, | |
cites: cites.split(' ') | |
}; | |
} else { | |
return findCites(el.parentElement) | |
} | |
} else { | |
return undefined; | |
} | |
}; | |
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); | |
for (var i=0; i<bibliorefs.length; i++) { | |
const ref = bibliorefs[i]; | |
const citeInfo = findCites(ref); | |
if (citeInfo) { | |
tippyHover(citeInfo.el, function() { | |
var popup = window.document.createElement('div'); | |
citeInfo.cites.forEach(function(cite) { | |
var citeDiv = window.document.createElement('div'); | |
citeDiv.classList.add('hanging-indent'); | |
citeDiv.classList.add('csl-entry'); | |
var biblioDiv = window.document.getElementById('ref-' + cite); | |
if (biblioDiv) { | |
citeDiv.innerHTML = biblioDiv.innerHTML; | |
} | |
popup.appendChild(citeDiv); | |
}); | |
return popup.innerHTML; | |
}); | |
} | |
} | |
}); | |
</script> | |
</body></html> |