fxmarty commited on
Commit
182e85b
·
verified ·
1 Parent(s): 9a1380b

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +36 -6
config.json CHANGED
@@ -21,7 +21,10 @@
21
  "pad_token_id": -1,
22
  "pretraining_tp": 1,
23
  "quantization_config": {
24
- "activation_scheme": "static",
 
 
 
25
  "export": {
26
  "kv_cache_group": [],
27
  "pack_method": "reorder",
@@ -38,11 +41,38 @@
38
  ]
39
  ]
40
  },
41
- "ignored_layers": [
42
- "lm_head"
43
- ],
44
- "kv_cache_scheme": null,
45
- "quant_method": "fp8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  },
47
  "rms_norm_eps": 1e-06,
48
  "rope_scaling": null,
 
21
  "pad_token_id": -1,
22
  "pretraining_tp": 1,
23
  "quantization_config": {
24
+ "algo_config": null,
25
+ "exclude": [
26
+ "lm_head"
27
+ ],
28
  "export": {
29
  "kv_cache_group": [],
30
  "pack_method": "reorder",
 
41
  ]
42
  ]
43
  },
44
+ "global_quant_config": {
45
+ "bias": null,
46
+ "input_tensors": {
47
+ "ch_axis": null,
48
+ "dtype": "fp8_e4m3",
49
+ "group_size": null,
50
+ "is_dynamic": false,
51
+ "observer_cls": "PerTensorMinMaxObserver",
52
+ "qscheme": "per_tensor",
53
+ "round_method": null,
54
+ "scale_type": null,
55
+ "symmetric": null
56
+ },
57
+ "output_tensors": null,
58
+ "target_device": null,
59
+ "weight": {
60
+ "ch_axis": null,
61
+ "dtype": "fp8_e4m3",
62
+ "group_size": null,
63
+ "is_dynamic": false,
64
+ "observer_cls": "PerTensorMinMaxObserver",
65
+ "qscheme": "per_tensor",
66
+ "round_method": null,
67
+ "scale_type": null,
68
+ "symmetric": null
69
+ }
70
+ },
71
+ "layer_quant_config": {},
72
+ "layer_type_quant_config": {},
73
+ "pack_method": "reorder",
74
+ "quant_method": "quark",
75
+ "quant_mode": 1
76
  },
77
  "rms_norm_eps": 1e-06,
78
  "rope_scaling": null,