Upload GPTOptim
Browse files- model.safetensors +2 -2
- modeling_gpt_optimized.py +3 -2
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86048048139b2cae7b486e2da9b4abc53112e9290d190f6d23bec864a1fdfa3b
|
3 |
+
size 4040722640
|
modeling_gpt_optimized.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
|
|
3 |
from torch.nn import CrossEntropyLoss, functional as F
|
4 |
from transformers import PreTrainedModel, GPT2PreTrainedModel
|
5 |
from .configuration_gpt_optimized import GPTOptimConfig
|
@@ -145,8 +146,8 @@ class GPT(nn.Module):
|
|
145 |
self.config = config
|
146 |
|
147 |
self.transformer = nn.ModuleDict(dict(
|
148 |
-
wte = nn.
|
149 |
-
wpe = nn.
|
150 |
h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
|
151 |
ln_f = nn.LayerNorm(config.n_embd),
|
152 |
))
|
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
3 |
+
import bitsandbytes
|
4 |
from torch.nn import CrossEntropyLoss, functional as F
|
5 |
from transformers import PreTrainedModel, GPT2PreTrainedModel
|
6 |
from .configuration_gpt_optimized import GPTOptimConfig
|
|
|
146 |
self.config = config
|
147 |
|
148 |
self.transformer = nn.ModuleDict(dict(
|
149 |
+
wte = bitsandbytes.nn.StableEmbedding(config.vocab_size, config.n_embd),
|
150 |
+
wpe = bitsandbytes.nn.StableEmbedding(config.block_size, config.n_embd),
|
151 |
h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
|
152 |
ln_f = nn.LayerNorm(config.n_embd),
|
153 |
))
|