tuannm2914 commited on
Commit
9aec54e
·
1 Parent(s): 8bccf1f

Model save

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 1.6782
17
 
18
  ## Model description
19
 
@@ -47,7 +47,7 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | No log | 0 | 0 | 1.6782 |
51
 
52
 
53
  ### Framework versions
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 1.4181
17
 
18
  ## Model description
19
 
 
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
+ | No log | 0 | 0 | 1.4181 |
51
 
52
 
53
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "k_proj",
20
  "v_proj",
21
- "q_proj",
22
- "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "o_proj",
20
  "k_proj",
21
  "v_proj",
22
+ "q_proj"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.6781893968582153,
4
- "eval_runtime": 1.9231,
5
- "eval_samples": 100,
6
- "eval_samples_per_second": 51.999,
7
- "eval_steps_per_second": 6.76,
8
- "train_loss": 0.17418603599071503,
9
- "train_runtime": 69.1029,
10
- "train_samples": 1100,
11
- "train_samples_per_second": 15.918,
12
- "train_steps_per_second": 0.029
13
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.418121576309204,
4
+ "eval_runtime": 35.9445,
5
+ "eval_samples": 1200,
6
+ "eval_samples_per_second": 33.385,
7
+ "eval_steps_per_second": 4.173,
8
+ "train_loss": 0.2214554250240326,
9
+ "train_runtime": 145.6296,
10
+ "train_samples": 1200,
11
+ "train_samples_per_second": 8.24,
12
+ "train_steps_per_second": 0.014
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "eval_loss": 1.6781893968582153,
4
- "eval_runtime": 1.9231,
5
- "eval_samples": 100,
6
- "eval_samples_per_second": 51.999,
7
- "eval_steps_per_second": 6.76
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "eval_loss": 1.418121576309204,
4
+ "eval_runtime": 35.9445,
5
+ "eval_samples": 1200,
6
+ "eval_samples_per_second": 33.385,
7
+ "eval_steps_per_second": 4.173
8
  }
runs/Nov30_10-17-10_hpc-hblab/events.out.tfevents.1701314316.hpc-hblab.1547149.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c1361cd39b20bf198e99c9d79c16333b6fc9e891dff95ceeb13b4595b2ed7e1
3
+ size 4821
runs/Nov30_10-17-10_hpc-hblab/events.out.tfevents.1701314498.hpc-hblab.1547149.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce62bb8a538de05a0af1a38114d22df8072606058b35f9a9de4aa810838aedbe
3
+ size 344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0,
3
- "train_loss": 0.17418603599071503,
4
- "train_runtime": 69.1029,
5
- "train_samples": 1100,
6
- "train_samples_per_second": 15.918,
7
- "train_steps_per_second": 0.029
8
  }
 
1
  {
2
  "epoch": 0,
3
+ "train_loss": 0.2214554250240326,
4
+ "train_runtime": 145.6296,
5
+ "train_samples": 1200,
6
+ "train_samples_per_second": 8.24,
7
+ "train_steps_per_second": 0.014
8
  }
trainer_state.json CHANGED
@@ -10,27 +10,27 @@
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
- "eval_loss": 1.6781895160675049,
14
- "eval_runtime": 1.9183,
15
- "eval_samples_per_second": 52.129,
16
- "eval_steps_per_second": 6.777,
17
  "step": 0
18
  },
19
  {
20
  "epoch": 0,
21
  "step": 0,
22
- "total_flos": 4138152939749376.0,
23
- "train_loss": 0.17418603599071503,
24
- "train_runtime": 69.1029,
25
- "train_samples_per_second": 15.918,
26
- "train_steps_per_second": 0.029
27
  }
28
  ],
29
  "logging_steps": 5,
30
  "max_steps": 2,
31
  "num_train_epochs": 1,
32
  "save_steps": 500,
33
- "total_flos": 4138152939749376.0,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
+ "eval_loss": 1.4181212186813354,
14
+ "eval_runtime": 35.8986,
15
+ "eval_samples_per_second": 33.427,
16
+ "eval_steps_per_second": 4.178,
17
  "step": 0
18
  },
19
  {
20
  "epoch": 0,
21
  "step": 0,
22
+ "total_flos": 6955618468364288.0,
23
+ "train_loss": 0.2214554250240326,
24
+ "train_runtime": 145.6296,
25
+ "train_samples_per_second": 8.24,
26
+ "train_steps_per_second": 0.014
27
  }
28
  ],
29
  "logging_steps": 5,
30
  "max_steps": 2,
31
  "num_train_epochs": 1,
32
  "save_steps": 500,
33
+ "total_flos": 6955618468364288.0,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:738bd7ca07742f4e6bc4d88571499f5e22fabacca9290dceee3a207594692b11
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1852d234f44667a230b4cb1c06479e3c7d7069f8107216ef8fa926ec3a32c806
3
  size 4664