sushruthsam commited on
Commit
1fcb13c
·
verified ·
1 Parent(s): 8001b0d

Delete model/description.json

Browse files
Files changed (1) hide show
  1. model/description.json +0 -359
model/description.json DELETED
@@ -1,359 +0,0 @@
1
- {
2
- "command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json",
3
- "compute": {
4
- "arch_list": [
5
- "sm_50",
6
- "sm_60",
7
- "sm_70",
8
- "sm_75",
9
- "sm_80",
10
- "sm_86",
11
- "sm_90"
12
- ],
13
- "devices": {
14
- "0": {
15
- "device_capability": [
16
- 8,
17
- 0
18
- ],
19
- "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)",
20
- "gpu_type": "NVIDIA A100-SXM4-40GB"
21
- }
22
- },
23
- "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
24
- "gpus_per_node": 1,
25
- "num_nodes": 1
26
- },
27
- "config": {
28
- "adapter": {
29
- "alpha": 16,
30
- "bias_type": "none",
31
- "dropout": 0.05,
32
- "postprocessor": {
33
- "merge_adapter_into_base_model": false,
34
- "progressbar": false
35
- },
36
- "pretrained_adapter_weights": null,
37
- "r": 8,
38
- "target_modules": null,
39
- "type": "lora",
40
- "use_dora": false,
41
- "use_rslora": false
42
- },
43
- "backend": null,
44
- "base_model": "mistralai/Mistral-7B-Instruct-v0.2",
45
- "defaults": {
46
- "text": {
47
- "decoder": {
48
- "fc_activation": "relu",
49
- "fc_bias_initializer": "zeros",
50
- "fc_dropout": 0.0,
51
- "fc_layers": null,
52
- "fc_norm": null,
53
- "fc_norm_params": null,
54
- "fc_output_size": 256,
55
- "fc_use_bias": true,
56
- "fc_weights_initializer": "xavier_uniform",
57
- "input_size": null,
58
- "max_new_tokens": null,
59
- "num_fc_layers": 0,
60
- "pretrained_model_name_or_path": "",
61
- "tokenizer": "hf_tokenizer",
62
- "type": "text_extractor",
63
- "vocab_file": ""
64
- },
65
- "encoder": {
66
- "skip": false,
67
- "type": "passthrough"
68
- },
69
- "loss": {
70
- "class_similarities": null,
71
- "class_similarities_temperature": 0,
72
- "class_weights": null,
73
- "confidence_penalty": 0,
74
- "robust_lambda": 0,
75
- "type": "next_token_softmax_cross_entropy",
76
- "unique": false,
77
- "weight": 1.0
78
- },
79
- "preprocessing": {
80
- "cache_encoder_embeddings": false,
81
- "compute_idf": false,
82
- "computed_fill_value": "<UNK>",
83
- "fill_value": "<UNK>",
84
- "lowercase": false,
85
- "max_sequence_length": 256,
86
- "missing_value_strategy": "fill_with_const",
87
- "most_common": 20000,
88
- "ngram_size": 2,
89
- "padding": "right",
90
- "padding_symbol": "<PAD>",
91
- "pretrained_model_name_or_path": null,
92
- "prompt": {
93
- "retrieval": {
94
- "index_name": null,
95
- "k": 0,
96
- "model_name": null,
97
- "type": null
98
- },
99
- "task": null,
100
- "template": null
101
- },
102
- "sequence_length": null,
103
- "tokenizer": "space_punct",
104
- "unknown_symbol": "<UNK>",
105
- "vocab_file": null
106
- }
107
- }
108
- },
109
- "generation": {
110
- "bad_words_ids": null,
111
- "begin_suppress_tokens": null,
112
- "bos_token_id": null,
113
- "diversity_penalty": 0.0,
114
- "do_sample": true,
115
- "early_stopping": false,
116
- "encoder_repetition_penalty": 1.0,
117
- "eos_token_id": null,
118
- "epsilon_cutoff": 0.0,
119
- "eta_cutoff": 0.0,
120
- "exponential_decay_length_penalty": null,
121
- "force_words_ids": null,
122
- "forced_bos_token_id": null,
123
- "forced_decoder_ids": null,
124
- "forced_eos_token_id": null,
125
- "guidance_scale": null,
126
- "length_penalty": 1.0,
127
- "max_length": 32,
128
- "max_new_tokens": 512,
129
- "max_time": null,
130
- "min_length": 0,
131
- "min_new_tokens": null,
132
- "no_repeat_ngram_size": 0,
133
- "num_beam_groups": 1,
134
- "num_beams": 1,
135
- "pad_token_id": null,
136
- "penalty_alpha": null,
137
- "prompt_lookup_num_tokens": null,
138
- "remove_invalid_values": false,
139
- "renormalize_logits": false,
140
- "repetition_penalty": 1.0,
141
- "sequence_bias": null,
142
- "suppress_tokens": null,
143
- "temperature": 0.1,
144
- "top_k": 50,
145
- "top_p": 1.0,
146
- "typical_p": 1.0,
147
- "use_cache": true
148
- },
149
- "hyperopt": null,
150
- "input_features": [
151
- {
152
- "active": true,
153
- "column": "question",
154
- "encoder": {
155
- "skip": false,
156
- "type": "passthrough"
157
- },
158
- "name": "question",
159
- "preprocessing": {
160
- "cache_encoder_embeddings": false,
161
- "compute_idf": false,
162
- "computed_fill_value": "<UNK>",
163
- "fill_value": "<UNK>",
164
- "lowercase": false,
165
- "max_sequence_length": null,
166
- "missing_value_strategy": "fill_with_const",
167
- "most_common": 20000,
168
- "ngram_size": 2,
169
- "padding": "left",
170
- "padding_symbol": "<PAD>",
171
- "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
172
- "sequence_length": null,
173
- "tokenizer": "hf_tokenizer",
174
- "unknown_symbol": "<UNK>",
175
- "vocab_file": null
176
- },
177
- "proc_column": "question_Nlu_HO",
178
- "tied": null,
179
- "type": "text"
180
- }
181
- ],
182
- "ludwig_version": "0.10.2",
183
- "model_parameters": null,
184
- "model_type": "llm",
185
- "output_features": [
186
- {
187
- "active": true,
188
- "class_similarities": null,
189
- "column": "record_id",
190
- "decoder": {
191
- "fc_activation": "relu",
192
- "fc_bias_initializer": "zeros",
193
- "fc_dropout": 0.0,
194
- "fc_layers": null,
195
- "fc_norm": null,
196
- "fc_norm_params": null,
197
- "fc_output_size": 256,
198
- "fc_use_bias": true,
199
- "fc_weights_initializer": "xavier_uniform",
200
- "input_size": null,
201
- "max_new_tokens": 512,
202
- "num_fc_layers": 0,
203
- "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
204
- "tokenizer": "hf_tokenizer",
205
- "type": "text_extractor",
206
- "vocab_file": ""
207
- },
208
- "default_validation_metric": "loss",
209
- "dependencies": [],
210
- "input_size": null,
211
- "loss": {
212
- "class_similarities": null,
213
- "class_similarities_temperature": 0,
214
- "class_weights": null,
215
- "confidence_penalty": 0,
216
- "robust_lambda": 0,
217
- "type": "next_token_softmax_cross_entropy",
218
- "unique": false,
219
- "weight": 1.0
220
- },
221
- "name": "record_id",
222
- "num_classes": null,
223
- "preprocessing": {
224
- "cache_encoder_embeddings": false,
225
- "compute_idf": false,
226
- "computed_fill_value": "<UNK>",
227
- "fill_value": "<UNK>",
228
- "lowercase": false,
229
- "max_sequence_length": null,
230
- "missing_value_strategy": "drop_row",
231
- "most_common": 20000,
232
- "ngram_size": 2,
233
- "padding": "left",
234
- "padding_symbol": "<PAD>",
235
- "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
236
- "sequence_length": null,
237
- "tokenizer": "hf_tokenizer",
238
- "unknown_symbol": "<UNK>",
239
- "vocab_file": null
240
- },
241
- "proc_column": "record_id_D_Znvc",
242
- "reduce_dependencies": "sum",
243
- "reduce_input": "sum",
244
- "type": "text"
245
- }
246
- ],
247
- "preprocessing": {
248
- "global_max_sequence_length": 512,
249
- "oversample_minority": null,
250
- "sample_ratio": 1.0,
251
- "sample_size": null,
252
- "split": {
253
- "probabilities": [
254
- 1.0,
255
- 0.0,
256
- 0.0
257
- ],
258
- "type": "random"
259
- },
260
- "undersample_majority": null
261
- },
262
- "prompt": {
263
- "retrieval": {
264
- "index_name": null,
265
- "k": 0,
266
- "model_name": null,
267
- "type": null
268
- },
269
- "task": null,
270
- "template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:"
271
- },
272
- "quantization": {
273
- "bits": 4,
274
- "bnb_4bit_compute_dtype": "float16",
275
- "bnb_4bit_quant_type": "nf4",
276
- "bnb_4bit_use_double_quant": true,
277
- "llm_int8_has_fp16_weight": false,
278
- "llm_int8_threshold": 6.0
279
- },
280
- "trainer": {
281
- "base_learning_rate": 0.0,
282
- "batch_size": 1,
283
- "bucketing_field": null,
284
- "checkpoints_per_epoch": 0,
285
- "compile": false,
286
- "early_stop": 5,
287
- "effective_batch_size": "auto",
288
- "enable_gradient_checkpointing": false,
289
- "enable_profiling": false,
290
- "epochs": 25,
291
- "eval_batch_size": 2,
292
- "eval_steps": null,
293
- "evaluate_training_set": false,
294
- "gradient_accumulation_steps": 16,
295
- "gradient_clipping": {
296
- "clipglobalnorm": 0.5,
297
- "clipnorm": null,
298
- "clipvalue": null
299
- },
300
- "increase_batch_size_eval_metric": "loss",
301
- "increase_batch_size_eval_split": "training",
302
- "increase_batch_size_on_plateau": 0,
303
- "increase_batch_size_on_plateau_patience": 5,
304
- "increase_batch_size_on_plateau_rate": 2.0,
305
- "learning_rate": 0.0004,
306
- "learning_rate_scaling": "linear",
307
- "learning_rate_scheduler": {
308
- "decay": "cosine",
309
- "decay_rate": 0.96,
310
- "decay_steps": 10000,
311
- "eta_min": 0,
312
- "reduce_eval_metric": "loss",
313
- "reduce_eval_split": "training",
314
- "reduce_on_plateau": 0,
315
- "reduce_on_plateau_patience": 10,
316
- "reduce_on_plateau_rate": 0.1,
317
- "staircase": false,
318
- "t_0": null,
319
- "t_mult": 1,
320
- "warmup_evaluations": 0,
321
- "warmup_fraction": 0.03
322
- },
323
- "max_batch_size": 1099511627776,
324
- "optimizer": {
325
- "amsgrad": false,
326
- "betas": [
327
- 0.9,
328
- 0.999
329
- ],
330
- "block_wise": true,
331
- "eps": 1e-08,
332
- "percentile_clipping": 100,
333
- "type": "paged_adam",
334
- "weight_decay": 0.0
335
- },
336
- "profiler": {
337
- "active": 3,
338
- "repeat": 5,
339
- "skip_first": 0,
340
- "wait": 1,
341
- "warmup": 1
342
- },
343
- "regularization_lambda": 0.0,
344
- "regularization_type": "l2",
345
- "should_shuffle": true,
346
- "skip_all_evaluation": false,
347
- "steps_per_checkpoint": 0,
348
- "train_steps": null,
349
- "type": "finetune",
350
- "use_mixed_precision": false,
351
- "validation_field": "record_id",
352
- "validation_metric": "loss"
353
- }
354
- },
355
- "data_format": "<class 'pandas.core.frame.DataFrame'>",
356
- "ludwig_version": "0.10.2",
357
- "random_seed": 42,
358
- "torch_version": "2.2.1+cu121"
359
- }