Spaces:
Running
on
T4
Running
on
T4
""" | |
Helpers to train with 16-bit precision. | |
""" | |
import torch.nn as nn | |
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors | |
def convert_module_to_f16(l): | |
""" | |
Convert primitive modules to float16. | |
""" | |
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): | |
l.weight.data = l.weight.data.half() | |
l.bias.data = l.bias.data.half() | |
def convert_module_to_f32(l): | |
""" | |
Convert primitive modules to float32, undoing convert_module_to_f16(). | |
""" | |
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): | |
l.weight.data = l.weight.data.float() | |
l.bias.data = l.bias.data.float() | |
def make_master_params(model_params): | |
""" | |
Copy model parameters into a (differently-shaped) list of full-precision | |
parameters. | |
""" | |
master_params = _flatten_dense_tensors( | |
[param.detach().float() for param in model_params] | |
) | |
master_params = nn.Parameter(master_params) | |
master_params.requires_grad = True | |
return [master_params] | |
def model_grads_to_master_grads(model_params, master_params): | |
""" | |
Copy the gradients from the model parameters into the master parameters | |
from make_master_params(). | |
""" | |
master_params[0].grad = _flatten_dense_tensors( | |
[param.grad.data.detach().float() for param in model_params] | |
) | |
def master_params_to_model_params(model_params, master_params): | |
""" | |
Copy the master parameter data back into the model parameters. | |
""" | |
# Without copying to a list, if a generator is passed, this will | |
# silently not copy any parameters. | |
model_params = list(model_params) | |
for param, master_param in zip( | |
model_params, unflatten_master_params(model_params, master_params) | |
): | |
param.detach().copy_(master_param) | |
def unflatten_master_params(model_params, master_params): | |
""" | |
Unflatten the master parameters to look like model_params. | |
""" | |
return _unflatten_dense_tensors(master_params[0].detach(), model_params) | |
def zero_grad(model_params): | |
for param in model_params: | |
# Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group | |
if param.grad is not None: | |
param.grad.detach_() | |
param.grad.zero_() | |