sushruthsam commited on
Commit
8001b0d
·
verified ·
1 Parent(s): 64734e7

Upload 2 files

Browse files
model/description.json ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json",
3
+ "compute": {
4
+ "arch_list": [
5
+ "sm_50",
6
+ "sm_60",
7
+ "sm_70",
8
+ "sm_75",
9
+ "sm_80",
10
+ "sm_86",
11
+ "sm_90"
12
+ ],
13
+ "devices": {
14
+ "0": {
15
+ "device_capability": [
16
+ 8,
17
+ 0
18
+ ],
19
+ "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)",
20
+ "gpu_type": "NVIDIA A100-SXM4-40GB"
21
+ }
22
+ },
23
+ "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
24
+ "gpus_per_node": 1,
25
+ "num_nodes": 1
26
+ },
27
+ "config": {
28
+ "adapter": {
29
+ "alpha": 16,
30
+ "bias_type": "none",
31
+ "dropout": 0.05,
32
+ "postprocessor": {
33
+ "merge_adapter_into_base_model": false,
34
+ "progressbar": false
35
+ },
36
+ "pretrained_adapter_weights": null,
37
+ "r": 8,
38
+ "target_modules": null,
39
+ "type": "lora",
40
+ "use_dora": false,
41
+ "use_rslora": false
42
+ },
43
+ "backend": null,
44
+ "base_model": "mistralai/Mistral-7B-Instruct-v0.2",
45
+ "defaults": {
46
+ "text": {
47
+ "decoder": {
48
+ "fc_activation": "relu",
49
+ "fc_bias_initializer": "zeros",
50
+ "fc_dropout": 0.0,
51
+ "fc_layers": null,
52
+ "fc_norm": null,
53
+ "fc_norm_params": null,
54
+ "fc_output_size": 256,
55
+ "fc_use_bias": true,
56
+ "fc_weights_initializer": "xavier_uniform",
57
+ "input_size": null,
58
+ "max_new_tokens": null,
59
+ "num_fc_layers": 0,
60
+ "pretrained_model_name_or_path": "",
61
+ "tokenizer": "hf_tokenizer",
62
+ "type": "text_extractor",
63
+ "vocab_file": ""
64
+ },
65
+ "encoder": {
66
+ "skip": false,
67
+ "type": "passthrough"
68
+ },
69
+ "loss": {
70
+ "class_similarities": null,
71
+ "class_similarities_temperature": 0,
72
+ "class_weights": null,
73
+ "confidence_penalty": 0,
74
+ "robust_lambda": 0,
75
+ "type": "next_token_softmax_cross_entropy",
76
+ "unique": false,
77
+ "weight": 1.0
78
+ },
79
+ "preprocessing": {
80
+ "cache_encoder_embeddings": false,
81
+ "compute_idf": false,
82
+ "computed_fill_value": "<UNK>",
83
+ "fill_value": "<UNK>",
84
+ "lowercase": false,
85
+ "max_sequence_length": 256,
86
+ "missing_value_strategy": "fill_with_const",
87
+ "most_common": 20000,
88
+ "ngram_size": 2,
89
+ "padding": "right",
90
+ "padding_symbol": "<PAD>",
91
+ "pretrained_model_name_or_path": null,
92
+ "prompt": {
93
+ "retrieval": {
94
+ "index_name": null,
95
+ "k": 0,
96
+ "model_name": null,
97
+ "type": null
98
+ },
99
+ "task": null,
100
+ "template": null
101
+ },
102
+ "sequence_length": null,
103
+ "tokenizer": "space_punct",
104
+ "unknown_symbol": "<UNK>",
105
+ "vocab_file": null
106
+ }
107
+ }
108
+ },
109
+ "generation": {
110
+ "bad_words_ids": null,
111
+ "begin_suppress_tokens": null,
112
+ "bos_token_id": null,
113
+ "diversity_penalty": 0.0,
114
+ "do_sample": true,
115
+ "early_stopping": false,
116
+ "encoder_repetition_penalty": 1.0,
117
+ "eos_token_id": null,
118
+ "epsilon_cutoff": 0.0,
119
+ "eta_cutoff": 0.0,
120
+ "exponential_decay_length_penalty": null,
121
+ "force_words_ids": null,
122
+ "forced_bos_token_id": null,
123
+ "forced_decoder_ids": null,
124
+ "forced_eos_token_id": null,
125
+ "guidance_scale": null,
126
+ "length_penalty": 1.0,
127
+ "max_length": 32,
128
+ "max_new_tokens": 512,
129
+ "max_time": null,
130
+ "min_length": 0,
131
+ "min_new_tokens": null,
132
+ "no_repeat_ngram_size": 0,
133
+ "num_beam_groups": 1,
134
+ "num_beams": 1,
135
+ "pad_token_id": null,
136
+ "penalty_alpha": null,
137
+ "prompt_lookup_num_tokens": null,
138
+ "remove_invalid_values": false,
139
+ "renormalize_logits": false,
140
+ "repetition_penalty": 1.0,
141
+ "sequence_bias": null,
142
+ "suppress_tokens": null,
143
+ "temperature": 0.1,
144
+ "top_k": 50,
145
+ "top_p": 1.0,
146
+ "typical_p": 1.0,
147
+ "use_cache": true
148
+ },
149
+ "hyperopt": null,
150
+ "input_features": [
151
+ {
152
+ "active": true,
153
+ "column": "question",
154
+ "encoder": {
155
+ "skip": false,
156
+ "type": "passthrough"
157
+ },
158
+ "name": "question",
159
+ "preprocessing": {
160
+ "cache_encoder_embeddings": false,
161
+ "compute_idf": false,
162
+ "computed_fill_value": "<UNK>",
163
+ "fill_value": "<UNK>",
164
+ "lowercase": false,
165
+ "max_sequence_length": null,
166
+ "missing_value_strategy": "fill_with_const",
167
+ "most_common": 20000,
168
+ "ngram_size": 2,
169
+ "padding": "left",
170
+ "padding_symbol": "<PAD>",
171
+ "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
172
+ "sequence_length": null,
173
+ "tokenizer": "hf_tokenizer",
174
+ "unknown_symbol": "<UNK>",
175
+ "vocab_file": null
176
+ },
177
+ "proc_column": "question_Nlu_HO",
178
+ "tied": null,
179
+ "type": "text"
180
+ }
181
+ ],
182
+ "ludwig_version": "0.10.2",
183
+ "model_parameters": null,
184
+ "model_type": "llm",
185
+ "output_features": [
186
+ {
187
+ "active": true,
188
+ "class_similarities": null,
189
+ "column": "record_id",
190
+ "decoder": {
191
+ "fc_activation": "relu",
192
+ "fc_bias_initializer": "zeros",
193
+ "fc_dropout": 0.0,
194
+ "fc_layers": null,
195
+ "fc_norm": null,
196
+ "fc_norm_params": null,
197
+ "fc_output_size": 256,
198
+ "fc_use_bias": true,
199
+ "fc_weights_initializer": "xavier_uniform",
200
+ "input_size": null,
201
+ "max_new_tokens": 512,
202
+ "num_fc_layers": 0,
203
+ "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
204
+ "tokenizer": "hf_tokenizer",
205
+ "type": "text_extractor",
206
+ "vocab_file": ""
207
+ },
208
+ "default_validation_metric": "loss",
209
+ "dependencies": [],
210
+ "input_size": null,
211
+ "loss": {
212
+ "class_similarities": null,
213
+ "class_similarities_temperature": 0,
214
+ "class_weights": null,
215
+ "confidence_penalty": 0,
216
+ "robust_lambda": 0,
217
+ "type": "next_token_softmax_cross_entropy",
218
+ "unique": false,
219
+ "weight": 1.0
220
+ },
221
+ "name": "record_id",
222
+ "num_classes": null,
223
+ "preprocessing": {
224
+ "cache_encoder_embeddings": false,
225
+ "compute_idf": false,
226
+ "computed_fill_value": "<UNK>",
227
+ "fill_value": "<UNK>",
228
+ "lowercase": false,
229
+ "max_sequence_length": null,
230
+ "missing_value_strategy": "drop_row",
231
+ "most_common": 20000,
232
+ "ngram_size": 2,
233
+ "padding": "left",
234
+ "padding_symbol": "<PAD>",
235
+ "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
236
+ "sequence_length": null,
237
+ "tokenizer": "hf_tokenizer",
238
+ "unknown_symbol": "<UNK>",
239
+ "vocab_file": null
240
+ },
241
+ "proc_column": "record_id_D_Znvc",
242
+ "reduce_dependencies": "sum",
243
+ "reduce_input": "sum",
244
+ "type": "text"
245
+ }
246
+ ],
247
+ "preprocessing": {
248
+ "global_max_sequence_length": 512,
249
+ "oversample_minority": null,
250
+ "sample_ratio": 1.0,
251
+ "sample_size": null,
252
+ "split": {
253
+ "probabilities": [
254
+ 1.0,
255
+ 0.0,
256
+ 0.0
257
+ ],
258
+ "type": "random"
259
+ },
260
+ "undersample_majority": null
261
+ },
262
+ "prompt": {
263
+ "retrieval": {
264
+ "index_name": null,
265
+ "k": 0,
266
+ "model_name": null,
267
+ "type": null
268
+ },
269
+ "task": null,
270
+ "template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:"
271
+ },
272
+ "quantization": {
273
+ "bits": 4,
274
+ "bnb_4bit_compute_dtype": "float16",
275
+ "bnb_4bit_quant_type": "nf4",
276
+ "bnb_4bit_use_double_quant": true,
277
+ "llm_int8_has_fp16_weight": false,
278
+ "llm_int8_threshold": 6.0
279
+ },
280
+ "trainer": {
281
+ "base_learning_rate": 0.0,
282
+ "batch_size": 1,
283
+ "bucketing_field": null,
284
+ "checkpoints_per_epoch": 0,
285
+ "compile": false,
286
+ "early_stop": 5,
287
+ "effective_batch_size": "auto",
288
+ "enable_gradient_checkpointing": false,
289
+ "enable_profiling": false,
290
+ "epochs": 25,
291
+ "eval_batch_size": 2,
292
+ "eval_steps": null,
293
+ "evaluate_training_set": false,
294
+ "gradient_accumulation_steps": 16,
295
+ "gradient_clipping": {
296
+ "clipglobalnorm": 0.5,
297
+ "clipnorm": null,
298
+ "clipvalue": null
299
+ },
300
+ "increase_batch_size_eval_metric": "loss",
301
+ "increase_batch_size_eval_split": "training",
302
+ "increase_batch_size_on_plateau": 0,
303
+ "increase_batch_size_on_plateau_patience": 5,
304
+ "increase_batch_size_on_plateau_rate": 2.0,
305
+ "learning_rate": 0.0004,
306
+ "learning_rate_scaling": "linear",
307
+ "learning_rate_scheduler": {
308
+ "decay": "cosine",
309
+ "decay_rate": 0.96,
310
+ "decay_steps": 10000,
311
+ "eta_min": 0,
312
+ "reduce_eval_metric": "loss",
313
+ "reduce_eval_split": "training",
314
+ "reduce_on_plateau": 0,
315
+ "reduce_on_plateau_patience": 10,
316
+ "reduce_on_plateau_rate": 0.1,
317
+ "staircase": false,
318
+ "t_0": null,
319
+ "t_mult": 1,
320
+ "warmup_evaluations": 0,
321
+ "warmup_fraction": 0.03
322
+ },
323
+ "max_batch_size": 1099511627776,
324
+ "optimizer": {
325
+ "amsgrad": false,
326
+ "betas": [
327
+ 0.9,
328
+ 0.999
329
+ ],
330
+ "block_wise": true,
331
+ "eps": 1e-08,
332
+ "percentile_clipping": 100,
333
+ "type": "paged_adam",
334
+ "weight_decay": 0.0
335
+ },
336
+ "profiler": {
337
+ "active": 3,
338
+ "repeat": 5,
339
+ "skip_first": 0,
340
+ "wait": 1,
341
+ "warmup": 1
342
+ },
343
+ "regularization_lambda": 0.0,
344
+ "regularization_type": "l2",
345
+ "should_shuffle": true,
346
+ "skip_all_evaluation": false,
347
+ "steps_per_checkpoint": 0,
348
+ "train_steps": null,
349
+ "type": "finetune",
350
+ "use_mixed_precision": false,
351
+ "validation_field": "record_id",
352
+ "validation_metric": "loss"
353
+ }
354
+ },
355
+ "data_format": "<class 'pandas.core.frame.DataFrame'>",
356
+ "ludwig_version": "0.10.2",
357
+ "random_seed": 42,
358
+ "torch_version": "2.2.1+cu121"
359
+ }
model/training_statistics.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluation_frequency": {
3
+ "frequency": 1,
4
+ "period": "epoch"
5
+ },
6
+ "test": {},
7
+ "training": {
8
+ "combined": {
9
+ "loss": [
10
+ 2.273854970932007,
11
+ 0.3827468156814575,
12
+ 0.15818676352500916,
13
+ 0.1293812245130539,
14
+ 0.11467840522527695,
15
+ 0.15169575810432434,
16
+ 0.1120581328868866,
17
+ 0.20288483798503876,
18
+ 0.14002884924411774,
19
+ 0.15531566739082336,
20
+ 0.11533121019601822,
21
+ 0.10372911393642426,
22
+ 0.08477381616830826,
23
+ 0.0911589041352272,
24
+ 0.07343953102827072,
25
+ 0.023161323741078377,
26
+ 0.07774317264556885,
27
+ 0.06919151544570923,
28
+ 0.07008837908506393,
29
+ 0.030844084918498993,
30
+ 0.025964314118027687,
31
+ 0.01707441359758377,
32
+ 0.03063521720468998,
33
+ 0.0024857439566403627,
34
+ 0.01527629978954792
35
+ ]
36
+ },
37
+ "record_id": {
38
+ "loss": [
39
+ 2.273854970932007,
40
+ 0.3827468156814575,
41
+ 0.15818676352500916,
42
+ 0.1293812245130539,
43
+ 0.11467840522527695,
44
+ 0.15169575810432434,
45
+ 0.1120581328868866,
46
+ 0.20288483798503876,
47
+ 0.14002884924411774,
48
+ 0.15531566739082336,
49
+ 0.11533121019601822,
50
+ 0.10372911393642426,
51
+ 0.08477381616830826,
52
+ 0.0911589041352272,
53
+ 0.07343953102827072,
54
+ 0.023161323741078377,
55
+ 0.07774317264556885,
56
+ 0.06919151544570923,
57
+ 0.07008837908506393,
58
+ 0.030844084918498993,
59
+ 0.025964314118027687,
60
+ 0.01707441359758377,
61
+ 0.03063521720468998,
62
+ 0.0024857439566403627,
63
+ 0.01527629978954792
64
+ ],
65
+ "next_token_perplexity": [
66
+ 19490.4921875,
67
+ 14166.068359375,
68
+ 12723.064453125,
69
+ 12498.83984375,
70
+ 12421.1279296875,
71
+ 12585.58203125,
72
+ 12413.1298828125,
73
+ 12835.3369140625,
74
+ 12517.0888671875,
75
+ 12556.447265625,
76
+ 12440.3193359375,
77
+ 12317.26171875,
78
+ 12243.7900390625,
79
+ 12318.326171875,
80
+ 12190.6103515625,
81
+ 11902.4248046875,
82
+ 12192.1630859375,
83
+ 12205.6474609375,
84
+ 12120.591796875,
85
+ 11974.94921875,
86
+ 11951.279296875,
87
+ 11927.4404296875,
88
+ 12000.904296875,
89
+ 11797.9345703125,
90
+ 11871.890625
91
+ ],
92
+ "perplexity": [
93
+ 31154.529296875,
94
+ 31088.609375,
95
+ 31035.79296875,
96
+ 31128.365234375,
97
+ 31052.521484375,
98
+ 31292.01171875,
99
+ 30575.08984375,
100
+ 30647.900390625,
101
+ 31290.48828125,
102
+ 30241.9453125,
103
+ 30386.8984375,
104
+ 30139.26953125,
105
+ 30028.556640625,
106
+ 29741.388671875,
107
+ 29391.01953125,
108
+ 30398.14453125,
109
+ 31056.044921875,
110
+ 31104.41796875,
111
+ 30798.970703125,
112
+ 30102.791015625,
113
+ 30365.43359375,
114
+ 29363.146484375,
115
+ 29221.5703125,
116
+ 30261.072265625,
117
+ 29413.42578125
118
+ ],
119
+ "sequence_accuracy": [
120
+ 0.0,
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ 0.0,
126
+ 0.0,
127
+ 0.0,
128
+ 0.0,
129
+ 0.0,
130
+ 0.0,
131
+ 0.0,
132
+ 0.0,
133
+ 0.0,
134
+ 0.0,
135
+ 0.0,
136
+ 0.0,
137
+ 0.0,
138
+ 0.0,
139
+ 0.0,
140
+ 0.0,
141
+ 0.0,
142
+ 0.0,
143
+ 0.0,
144
+ 0.0
145
+ ],
146
+ "token_accuracy": [
147
+ 0.0018595437286421657,
148
+ 0.0014020088128745556,
149
+ 0.0012599329929798841,
150
+ 0.0013859043829143047,
151
+ 0.0018072795355692506,
152
+ 0.0018894081003963947,
153
+ 0.0021492578089237213,
154
+ 0.0020676767453551292,
155
+ 0.0011784115340560675,
156
+ 0.0016606699209660292,
157
+ 0.0020560333505272865,
158
+ 0.0016291090287268162,
159
+ 0.001910633291117847,
160
+ 0.0012946828501299024,
161
+ 0.0016550812870264053,
162
+ 0.0012489061336964369,
163
+ 0.0012834429508075118,
164
+ 0.001796281081624329,
165
+ 0.0012682911474257708,
166
+ 0.001162951928563416,
167
+ 0.0010253882501274347,
168
+ 0.002276905346661806,
169
+ 0.0018036025576293468,
170
+ 0.001517598400823772,
171
+ 0.002525100251659751
172
+ ]
173
+ }
174
+ },
175
+ "validation": {}
176
+ }