kmfoda commited on
Commit
341df0e
·
verified ·
1 Parent(s): 78082ac

Upload GPTOptim

Browse files
Files changed (2) hide show
  1. model.safetensors +2 -2
  2. modeling_gpt_optimized.py +3 -2
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2c240204fac1bf66e112ce3be2384a0097a2ea95b57ed2a4896c6cd01ecf5f7
3
- size 4040701744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86048048139b2cae7b486e2da9b4abc53112e9290d190f6d23bec864a1fdfa3b
3
+ size 4040722640
modeling_gpt_optimized.py CHANGED
@@ -1,5 +1,6 @@
1
  import torch
2
  import torch.nn as nn
 
3
  from torch.nn import CrossEntropyLoss, functional as F
4
  from transformers import PreTrainedModel, GPT2PreTrainedModel
5
  from .configuration_gpt_optimized import GPTOptimConfig
@@ -145,8 +146,8 @@ class GPT(nn.Module):
145
  self.config = config
146
 
147
  self.transformer = nn.ModuleDict(dict(
148
- wte = nn.Embedding(config.vocab_size, config.n_embd),
149
- wpe = nn.Embedding(config.block_size, config.n_embd),
150
  h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
151
  ln_f = nn.LayerNorm(config.n_embd),
152
  ))
 
1
  import torch
2
  import torch.nn as nn
3
+ import bitsandbytes
4
  from torch.nn import CrossEntropyLoss, functional as F
5
  from transformers import PreTrainedModel, GPT2PreTrainedModel
6
  from .configuration_gpt_optimized import GPTOptimConfig
 
146
  self.config = config
147
 
148
  self.transformer = nn.ModuleDict(dict(
149
+ wte = bitsandbytes.nn.StableEmbedding(config.vocab_size, config.n_embd),
150
+ wpe = bitsandbytes.nn.StableEmbedding(config.block_size, config.n_embd),
151
  h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
152
  ln_f = nn.LayerNorm(config.n_embd),
153
  ))