File size: 2,271 Bytes
b8d192d
 
 
b530618
 
 
b8d192d
 
 
 
 
b530618
 
 
b8d192d
 
 
b530618
 
 
b2d35fb
b8d192d
 
b530618
 
 
b2d35fb
b8d192d
 
b530618
 
 
 
 
 
be86fd3
 
b530618
 
 
 
be86fd3
 
 
b530618
 
 
be86fd3
 
b530618
 
 
 
 
 
be86fd3
 
 
b530618
 
 
be86fd3
 
b530618
 
 
 
be86fd3
 
b530618
 
 
 
 
 
be86fd3
 
b530618
 
 
 
b8d192d
 
be86fd3
b530618
 
 
 
 
 
b8d192d
 
b2d35fb
b530618
b8d192d
b2d35fb
b530618
 
 
b8d192d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9961568024596464,
  "eval_steps": 10,
  "global_step": 36,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.03,
      "learning_rate": 3.9923893961834914e-05,
      "loss": 1.3268,
      "step": 1
    },
    {
      "epoch": 0.14,
      "learning_rate": 3.812615574073301e-05,
      "loss": 1.2548,
      "step": 5
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.285575219373079e-05,
      "loss": 1.1912,
      "step": 10
    },
    {
      "epoch": 0.28,
      "eval_loss": 1.109934687614441,
      "eval_runtime": 9.2946,
      "eval_samples_per_second": 5.057,
      "eval_steps_per_second": 1.291,
      "step": 10
    },
    {
      "epoch": 0.42,
      "learning_rate": 2.5176380902050418e-05,
      "loss": 1.1433,
      "step": 15
    },
    {
      "epoch": 0.55,
      "learning_rate": 1.6527036446661396e-05,
      "loss": 1.1238,
      "step": 20
    },
    {
      "epoch": 0.55,
      "eval_loss": 1.065536379814148,
      "eval_runtime": 9.2887,
      "eval_samples_per_second": 5.06,
      "eval_steps_per_second": 1.292,
      "step": 20
    },
    {
      "epoch": 0.69,
      "learning_rate": 8.528471272979083e-06,
      "loss": 1.1102,
      "step": 25
    },
    {
      "epoch": 0.83,
      "learning_rate": 2.679491924311226e-06,
      "loss": 1.1258,
      "step": 30
    },
    {
      "epoch": 0.83,
      "eval_loss": 1.055001974105835,
      "eval_runtime": 9.2909,
      "eval_samples_per_second": 5.059,
      "eval_steps_per_second": 1.292,
      "step": 30
    },
    {
      "epoch": 0.97,
      "learning_rate": 7.61060381650891e-08,
      "loss": 1.1272,
      "step": 35
    },
    {
      "epoch": 1.0,
      "step": 36,
      "total_flos": 1.0010669722946765e+17,
      "train_loss": 1.1544433269235823,
      "train_runtime": 5078.5964,
      "train_samples_per_second": 1.537,
      "train_steps_per_second": 0.007
    }
  ],
  "logging_steps": 5,
  "max_steps": 36,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 20,
  "total_flos": 1.0010669722946765e+17,
  "train_batch_size": 6,
  "trial_name": null,
  "trial_params": null
}