File size: 2,165 Bytes
b8d192d
 
 
b2d35fb
 
 
b8d192d
 
 
 
 
b2d35fb
 
 
b8d192d
 
 
b2d35fb
 
 
 
b8d192d
 
b2d35fb
 
 
 
b8d192d
 
b2d35fb
 
 
 
b8d192d
 
 
b2d35fb
 
 
b8d192d
 
b2d35fb
 
 
 
 
 
b8d192d
 
b2d35fb
 
 
 
b8d192d
 
b2d35fb
 
 
 
b8d192d
 
 
b2d35fb
 
 
b8d192d
 
b2d35fb
 
 
 
b8d192d
 
b2d35fb
 
 
 
 
 
b8d192d
 
b2d35fb
 
 
 
 
 
 
b8d192d
 
b2d35fb
 
b8d192d
b2d35fb
 
 
 
b8d192d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9891196834817013,
  "eval_steps": 20,
  "global_step": 40,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02,
      "learning_rate": 1.9969173337331283e-05,
      "loss": 1.6723,
      "step": 1
    },
    {
      "epoch": 0.12,
      "learning_rate": 1.9238795325112867e-05,
      "loss": 1.4829,
      "step": 5
    },
    {
      "epoch": 0.25,
      "learning_rate": 1.7071067811865477e-05,
      "loss": 1.3734,
      "step": 10
    },
    {
      "epoch": 0.37,
      "learning_rate": 1.3826834323650899e-05,
      "loss": 1.3486,
      "step": 15
    },
    {
      "epoch": 0.49,
      "learning_rate": 1e-05,
      "loss": 1.3274,
      "step": 20
    },
    {
      "epoch": 0.49,
      "eval_loss": 1.258691668510437,
      "eval_runtime": 2.1716,
      "eval_samples_per_second": 4.605,
      "eval_steps_per_second": 2.302,
      "step": 20
    },
    {
      "epoch": 0.62,
      "learning_rate": 6.173165676349103e-06,
      "loss": 1.2978,
      "step": 25
    },
    {
      "epoch": 0.74,
      "learning_rate": 2.9289321881345257e-06,
      "loss": 1.3259,
      "step": 30
    },
    {
      "epoch": 0.87,
      "learning_rate": 7.612046748871327e-07,
      "loss": 1.2801,
      "step": 35
    },
    {
      "epoch": 0.99,
      "learning_rate": 0.0,
      "loss": 1.3066,
      "step": 40
    },
    {
      "epoch": 0.99,
      "eval_loss": 1.2477926015853882,
      "eval_runtime": 2.1661,
      "eval_samples_per_second": 4.617,
      "eval_steps_per_second": 2.308,
      "step": 40
    },
    {
      "epoch": 0.99,
      "step": 40,
      "total_flos": 2.6717900760940544e+16,
      "train_loss": 1.3475643575191498,
      "train_runtime": 1594.7957,
      "train_samples_per_second": 1.268,
      "train_steps_per_second": 0.025
    }
  ],
  "logging_steps": 5,
  "max_steps": 40,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 50,
  "total_flos": 2.6717900760940544e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}