Upload 2 files

Browse files

Files changed (2) hide show

model/description.json +359 -0
model/training_statistics.json +176 -0

model/description.json ADDED Viewed

	@@ -0,0 +1,359 @@

+{
+    "command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json",
+    "compute": {
+        "arch_list": [
+            "sm_50",
+            "sm_60",
+            "sm_70",
+            "sm_75",
+            "sm_80",
+            "sm_86",
+            "sm_90"
+        ],
+        "devices": {
+            "0": {
+                "device_capability": [
+                    8,
+                    0
+                ],
+                "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)",
+                "gpu_type": "NVIDIA A100-SXM4-40GB"
+            }
+        },
+        "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
+        "gpus_per_node": 1,
+        "num_nodes": 1
+    },
+    "config": {
+        "adapter": {
+            "alpha": 16,
+            "bias_type": "none",
+            "dropout": 0.05,
+            "postprocessor": {
+                "merge_adapter_into_base_model": false,
+                "progressbar": false
+            },
+            "pretrained_adapter_weights": null,
+            "r": 8,
+            "target_modules": null,
+            "type": "lora",
+            "use_dora": false,
+            "use_rslora": false
+        },
+        "backend": null,
+        "base_model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "defaults": {
+            "text": {
+                "decoder": {
+                    "fc_activation": "relu",
+                    "fc_bias_initializer": "zeros",
+                    "fc_dropout": 0.0,
+                    "fc_layers": null,
+                    "fc_norm": null,
+                    "fc_norm_params": null,
+                    "fc_output_size": 256,
+                    "fc_use_bias": true,
+                    "fc_weights_initializer": "xavier_uniform",
+                    "input_size": null,
+                    "max_new_tokens": null,
+                    "num_fc_layers": 0,
+                    "pretrained_model_name_or_path": "",
+                    "tokenizer": "hf_tokenizer",
+                    "type": "text_extractor",
+                    "vocab_file": ""
+                },
+                "encoder": {
+                    "skip": false,
+                    "type": "passthrough"
+                },
+                "loss": {
+                    "class_similarities": null,
+                    "class_similarities_temperature": 0,
+                    "class_weights": null,
+                    "confidence_penalty": 0,
+                    "robust_lambda": 0,
+                    "type": "next_token_softmax_cross_entropy",
+                    "unique": false,
+                    "weight": 1.0
+                },
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": false,
+                    "max_sequence_length": 256,
+                    "missing_value_strategy": "fill_with_const",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "right",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": null,
+                    "prompt": {
+                        "retrieval": {
+                            "index_name": null,
+                            "k": 0,
+                            "model_name": null,
+                            "type": null
+                        },
+                        "task": null,
+                        "template": null
+                    },
+                    "sequence_length": null,
+                    "tokenizer": "space_punct",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                }
+            }
+        },
+        "generation": {
+            "bad_words_ids": null,
+            "begin_suppress_tokens": null,
+            "bos_token_id": null,
+            "diversity_penalty": 0.0,
+            "do_sample": true,
+            "early_stopping": false,
+            "encoder_repetition_penalty": 1.0,
+            "eos_token_id": null,
+            "epsilon_cutoff": 0.0,
+            "eta_cutoff": 0.0,
+            "exponential_decay_length_penalty": null,
+            "force_words_ids": null,
+            "forced_bos_token_id": null,
+            "forced_decoder_ids": null,
+            "forced_eos_token_id": null,
+            "guidance_scale": null,
+            "length_penalty": 1.0,
+            "max_length": 32,
+            "max_new_tokens": 512,
+            "max_time": null,
+            "min_length": 0,
+            "min_new_tokens": null,
+            "no_repeat_ngram_size": 0,
+            "num_beam_groups": 1,
+            "num_beams": 1,
+            "pad_token_id": null,
+            "penalty_alpha": null,
+            "prompt_lookup_num_tokens": null,
+            "remove_invalid_values": false,
+            "renormalize_logits": false,
+            "repetition_penalty": 1.0,
+            "sequence_bias": null,
+            "suppress_tokens": null,
+            "temperature": 0.1,
+            "top_k": 50,
+            "top_p": 1.0,
+            "typical_p": 1.0,
+            "use_cache": true
+        },
+        "hyperopt": null,
+        "input_features": [
+            {
+                "active": true,
+                "column": "question",
+                "encoder": {
+                    "skip": false,
+                    "type": "passthrough"
+                },
+                "name": "question",
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": false,
+                    "max_sequence_length": null,
+                    "missing_value_strategy": "fill_with_const",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "left",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+                    "sequence_length": null,
+                    "tokenizer": "hf_tokenizer",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                },
+                "proc_column": "question_Nlu_HO",
+                "tied": null,
+                "type": "text"
+            }
+        ],
+        "ludwig_version": "0.10.2",
+        "model_parameters": null,
+        "model_type": "llm",
+        "output_features": [
+            {
+                "active": true,
+                "class_similarities": null,
+                "column": "record_id",
+                "decoder": {
+                    "fc_activation": "relu",
+                    "fc_bias_initializer": "zeros",
+                    "fc_dropout": 0.0,
+                    "fc_layers": null,
+                    "fc_norm": null,
+                    "fc_norm_params": null,
+                    "fc_output_size": 256,
+                    "fc_use_bias": true,
+                    "fc_weights_initializer": "xavier_uniform",
+                    "input_size": null,
+                    "max_new_tokens": 512,
+                    "num_fc_layers": 0,
+                    "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+                    "tokenizer": "hf_tokenizer",
+                    "type": "text_extractor",
+                    "vocab_file": ""
+                },
+                "default_validation_metric": "loss",
+                "dependencies": [],
+                "input_size": null,
+                "loss": {
+                    "class_similarities": null,
+                    "class_similarities_temperature": 0,
+                    "class_weights": null,
+                    "confidence_penalty": 0,
+                    "robust_lambda": 0,
+                    "type": "next_token_softmax_cross_entropy",
+                    "unique": false,
+                    "weight": 1.0
+                },
+                "name": "record_id",
+                "num_classes": null,
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": false,
+                    "max_sequence_length": null,
+                    "missing_value_strategy": "drop_row",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "left",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+                    "sequence_length": null,
+                    "tokenizer": "hf_tokenizer",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                },
+                "proc_column": "record_id_D_Znvc",
+                "reduce_dependencies": "sum",
+                "reduce_input": "sum",
+                "type": "text"
+            }
+        ],
+        "preprocessing": {
+            "global_max_sequence_length": 512,
+            "oversample_minority": null,
+            "sample_ratio": 1.0,
+            "sample_size": null,
+            "split": {
+                "probabilities": [
+                    1.0,
+                    0.0,
+                    0.0
+                ],
+                "type": "random"
+            },
+            "undersample_majority": null
+        },
+        "prompt": {
+            "retrieval": {
+                "index_name": null,
+                "k": 0,
+                "model_name": null,
+                "type": null
+            },
+            "task": null,
+            "template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of  combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:"
+        },
+        "quantization": {
+            "bits": 4,
+            "bnb_4bit_compute_dtype": "float16",
+            "bnb_4bit_quant_type": "nf4",
+            "bnb_4bit_use_double_quant": true,
+            "llm_int8_has_fp16_weight": false,
+            "llm_int8_threshold": 6.0
+        },
+        "trainer": {
+            "base_learning_rate": 0.0,
+            "batch_size": 1,
+            "bucketing_field": null,
+            "checkpoints_per_epoch": 0,
+            "compile": false,
+            "early_stop": 5,
+            "effective_batch_size": "auto",
+            "enable_gradient_checkpointing": false,
+            "enable_profiling": false,
+            "epochs": 25,
+            "eval_batch_size": 2,
+            "eval_steps": null,
+            "evaluate_training_set": false,
+            "gradient_accumulation_steps": 16,
+            "gradient_clipping": {
+                "clipglobalnorm": 0.5,
+                "clipnorm": null,
+                "clipvalue": null
+            },
+            "increase_batch_size_eval_metric": "loss",
+            "increase_batch_size_eval_split": "training",
+            "increase_batch_size_on_plateau": 0,
+            "increase_batch_size_on_plateau_patience": 5,
+            "increase_batch_size_on_plateau_rate": 2.0,
+            "learning_rate": 0.0004,
+            "learning_rate_scaling": "linear",
+            "learning_rate_scheduler": {
+                "decay": "cosine",
+                "decay_rate": 0.96,
+                "decay_steps": 10000,
+                "eta_min": 0,
+                "reduce_eval_metric": "loss",
+                "reduce_eval_split": "training",
+                "reduce_on_plateau": 0,
+                "reduce_on_plateau_patience": 10,
+                "reduce_on_plateau_rate": 0.1,
+                "staircase": false,
+                "t_0": null,
+                "t_mult": 1,
+                "warmup_evaluations": 0,
+                "warmup_fraction": 0.03
+            },
+            "max_batch_size": 1099511627776,
+            "optimizer": {
+                "amsgrad": false,
+                "betas": [
+                    0.9,
+                    0.999
+                ],
+                "block_wise": true,
+                "eps": 1e-08,
+                "percentile_clipping": 100,
+                "type": "paged_adam",
+                "weight_decay": 0.0
+            },
+            "profiler": {
+                "active": 3,
+                "repeat": 5,
+                "skip_first": 0,
+                "wait": 1,
+                "warmup": 1
+            },
+            "regularization_lambda": 0.0,
+            "regularization_type": "l2",
+            "should_shuffle": true,
+            "skip_all_evaluation": false,
+            "steps_per_checkpoint": 0,
+            "train_steps": null,
+            "type": "finetune",
+            "use_mixed_precision": false,
+            "validation_field": "record_id",
+            "validation_metric": "loss"
+        }
+    },
+    "data_format": "<class 'pandas.core.frame.DataFrame'>",
+    "ludwig_version": "0.10.2",
+    "random_seed": 42,
+    "torch_version": "2.2.1+cu121"
+}

model/training_statistics.json ADDED Viewed

	@@ -0,0 +1,176 @@

+{
+    "evaluation_frequency": {
+        "frequency": 1,
+        "period": "epoch"
+    },
+    "test": {},
+    "training": {
+        "combined": {
+            "loss": [
+                2.273854970932007,
+                0.3827468156814575,
+                0.15818676352500916,
+                0.1293812245130539,
+                0.11467840522527695,
+                0.15169575810432434,
+                0.1120581328868866,
+                0.20288483798503876,
+                0.14002884924411774,
+                0.15531566739082336,
+                0.11533121019601822,
+                0.10372911393642426,
+                0.08477381616830826,
+                0.0911589041352272,
+                0.07343953102827072,
+                0.023161323741078377,
+                0.07774317264556885,
+                0.06919151544570923,
+                0.07008837908506393,
+                0.030844084918498993,
+                0.025964314118027687,
+                0.01707441359758377,
+                0.03063521720468998,
+                0.0024857439566403627,
+                0.01527629978954792
+            ]
+        },
+        "record_id": {
+            "loss": [
+                2.273854970932007,
+                0.3827468156814575,
+                0.15818676352500916,
+                0.1293812245130539,
+                0.11467840522527695,
+                0.15169575810432434,
+                0.1120581328868866,
+                0.20288483798503876,
+                0.14002884924411774,
+                0.15531566739082336,
+                0.11533121019601822,
+                0.10372911393642426,
+                0.08477381616830826,
+                0.0911589041352272,
+                0.07343953102827072,
+                0.023161323741078377,
+                0.07774317264556885,
+                0.06919151544570923,
+                0.07008837908506393,
+                0.030844084918498993,
+                0.025964314118027687,
+                0.01707441359758377,
+                0.03063521720468998,
+                0.0024857439566403627,
+                0.01527629978954792
+            ],
+            "next_token_perplexity": [
+                19490.4921875,
+                14166.068359375,
+                12723.064453125,
+                12498.83984375,
+                12421.1279296875,
+                12585.58203125,
+                12413.1298828125,
+                12835.3369140625,
+                12517.0888671875,
+                12556.447265625,
+                12440.3193359375,
+                12317.26171875,
+                12243.7900390625,
+                12318.326171875,
+                12190.6103515625,
+                11902.4248046875,
+                12192.1630859375,
+                12205.6474609375,
+                12120.591796875,
+                11974.94921875,
+                11951.279296875,
+                11927.4404296875,
+                12000.904296875,
+                11797.9345703125,
+                11871.890625
+            ],
+            "perplexity": [
+                31154.529296875,
+                31088.609375,
+                31035.79296875,
+                31128.365234375,
+                31052.521484375,
+                31292.01171875,
+                30575.08984375,
+                30647.900390625,
+                31290.48828125,
+                30241.9453125,
+                30386.8984375,
+                30139.26953125,
+                30028.556640625,
+                29741.388671875,
+                29391.01953125,
+                30398.14453125,
+                31056.044921875,
+                31104.41796875,
+                30798.970703125,
+                30102.791015625,
+                30365.43359375,
+                29363.146484375,
+                29221.5703125,
+                30261.072265625,
+                29413.42578125
+            ],
+            "sequence_accuracy": [
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0
+            ],
+            "token_accuracy": [
+                0.0018595437286421657,
+                0.0014020088128745556,
+                0.0012599329929798841,
+                0.0013859043829143047,
+                0.0018072795355692506,
+                0.0018894081003963947,
+                0.0021492578089237213,
+                0.0020676767453551292,
+                0.0011784115340560675,
+                0.0016606699209660292,
+                0.0020560333505272865,
+                0.0016291090287268162,
+                0.001910633291117847,
+                0.0012946828501299024,
+                0.0016550812870264053,
+                0.0012489061336964369,
+                0.0012834429508075118,
+                0.001796281081624329,
+                0.0012682911474257708,
+                0.001162951928563416,
+                0.0010253882501274347,
+                0.002276905346661806,
+                0.0018036025576293468,
+                0.001517598400823772,
+                0.002525100251659751
+            ]
+        }
+    },
+    "validation": {}
+}