Mxytyu commited on
Commit
82a267c
·
verified ·
1 Parent(s): 2ec93d0

Update model.safetensors

Browse files
Files changed (1) hide show
  1. model.safetensors +15 -3
model.safetensors CHANGED
@@ -29,6 +29,10 @@ class HelloWorldModel(PreTrainedModel, GenerationMixin):
29
  def _update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder=False):
30
  return model_kwargs
31
 
 
 
 
 
32
  tokenizer_config = {
33
  "do_lower_case": False,
34
  "model_max_length": 512,
@@ -38,7 +42,7 @@ tokenizer_config = {
38
  "unk_token": "<unk>",
39
  "bos_token": "<s>",
40
  "eos_token": "</s>",
41
- "vocab_size": 1, # Simplified vocabulary size
42
  }
43
 
44
  # Save tokenizer configuration
@@ -46,5 +50,13 @@ with open("tokenizer.json", "w") as f:
46
  import json
47
  json.dump(tokenizer_config, f)
48
 
49
- tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
50
- tokenizer.add_tokens(["Hello, world!"])
 
 
 
 
 
 
 
 
 
29
  def _update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder=False):
30
  return model_kwargs
31
 
32
+ # Define tokenizer
33
+ tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
34
+ tokenizer.add_tokens(["Hello, world!"])
35
+
36
  tokenizer_config = {
37
  "do_lower_case": False,
38
  "model_max_length": 512,
 
42
  "unk_token": "<unk>",
43
  "bos_token": "<s>",
44
  "eos_token": "</s>",
45
+ "vocab_size": 2, # Simplified vocabulary size
46
  }
47
 
48
  # Save tokenizer configuration
 
50
  import json
51
  json.dump(tokenizer_config, f)
52
 
53
+ # Initialize model
54
+ config = HelloWorldConfig(vocab_size=2) # Adjusted vocab size
55
+ model = HelloWorldModel(config)
56
+
57
+ # Create dummy state_dict for saving
58
+ state_dict = model.state_dict()
59
+
60
+ # Save model using safetensors format
61
+ from safetensors.torch import save_file
62
+ save_file(state_dict, "hello_world_model.safetensors")