dmcooller commited on
Commit
597f29c
1 Parent(s): 9d2dffc

dmcooller/neural-matia-phi-ft-3

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.2938
20
 
21
  ## Model description
22
 
@@ -44,22 +44,17 @@ The following hyperparameters were used during training:
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_steps: 2
47
- - num_epochs: 10
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:----:|:---------------:|
53
- | 1.9354 | 1.0 | 9 | 0.8417 |
54
- | 0.6176 | 2.0 | 18 | 0.3875 |
55
- | 0.382 | 3.0 | 27 | 0.3327 |
56
- | 0.3369 | 4.0 | 36 | 0.3130 |
57
- | 0.3179 | 5.0 | 45 | 0.3053 |
58
- | 0.3108 | 6.0 | 54 | 0.3008 |
59
- | 0.3043 | 7.0 | 63 | 0.2974 |
60
- | 0.3015 | 8.0 | 72 | 0.2952 |
61
- | 0.2982 | 9.0 | 81 | 0.2941 |
62
- | 0.2966 | 10.0 | 90 | 0.2938 |
63
 
64
 
65
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.3371
20
 
21
  ## Model description
22
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_steps: 2
47
+ - num_epochs: 5
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:----:|:---------------:|
53
+ | 2.7573 | 1.0 | 9 | 1.8932 |
54
+ | 1.3209 | 2.0 | 18 | 0.6054 |
55
+ | 0.5213 | 3.0 | 27 | 0.3828 |
56
+ | 0.3903 | 4.0 | 36 | 0.3464 |
57
+ | 0.3588 | 5.0 | 45 | 0.3371 |
 
 
 
 
 
58
 
59
 
60
  ### Framework versions
adapter_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "microsoft/phi-2",
5
- "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "dense",
25
  "k_proj",
26
- "v_proj"
 
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "microsoft/phi-2",
5
+ "bias": "all",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "k_proj",
24
+ "q_proj",
25
+ "v_proj",
26
+ "dense"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5d0e0fe3ad8d57b4c0785987f95e2c08d75b4a780975a9e2e6878afe4f71e76
3
- size 83920464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88dd44df2c6fa668f634807a17ff6f6553c84daa1a8d911d87c613bde513327c
3
+ size 87440736
runs/Apr08_09-27-31_ba950c75e1e3/events.out.tfevents.1712568458.ba950c75e1e3.75.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c560845d4341022688c0846d354fddf93b638b1333fc2ca5c27204075bebaa22
3
+ size 5277
runs/Apr08_09-27-31_ba950c75e1e3/events.out.tfevents.1712568510.ba950c75e1e3.75.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d2442767f50773aaf6038b6efeb3da3abbf1a225907c95ec4c827da139ba4e
3
+ size 7990
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eabb7cb8f3f9d859feb44979cc23d6d80f7e79f68197cbe706cb2f1c02d4ece
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2223e83f9336243cc114fdf3e44ef4335fdff8b0d59b1f406f227096db3d23ae
3
  size 4920