VERSIL91 commited on about 1 month ago

Commit

2a83b24

verified ·

1 Parent(s): 7ba3bfe

End of training

Browse files

Files changed (23) hide show

.gitattributes +1 -0
README.md +131 -129
adapter_config.json +34 -0
adapter_model.bin +3 -0
adapter_model.safetensors +3 -0
config.json +26 -165
last-checkpoint/README.md +202 -0
last-checkpoint/adapter_config.json +34 -0
last-checkpoint/adapter_model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +30 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer.model +3 -0
last-checkpoint/tokenizer_config.json +45 -0
last-checkpoint/trainer_state.json +151 -0
last-checkpoint/training_args.bin +3 -0
special_tokens_map.json +28 -5
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +43 -19
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -25,6 +25,7 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,152 +1,154 @@
 ---
-pipeline_tag: image-to-text
 tags:
-- image-captioning
-languages:
-- en
-license: bsd-3-clause
 ---
-# BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation
-Model card for image captioning pretrained on COCO dataset - base architecture (with ViT base backbone).
-| ![BLIP.gif](https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif) |
-|:--:|
-| <b> Pull figure from BLIP official repo | Image source: https://github.com/salesforce/BLIP </b>|
-## TL;DR
-Authors from the [paper](https://arxiv.org/abs/2201.12086) write in the abstract:
-*Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.*
-## Usage
-You can use this model for conditional and un-conditional image captioning
-### Using the Pytorch model
-#### Running the model on CPU
-<details>
-<summary> Click to expand </summary>
-```python
-import requests
-from PIL import Image
-from transformers import BlipProcessor, BlipForConditionalGeneration
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
-raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-# conditional image captioning
-text = "a photography of"
-inputs = processor(raw_image, text, return_tensors="pt")
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
-# >>> a photography of a woman and her dog
-# unconditional image captioning
-inputs = processor(raw_image, return_tensors="pt")
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
->>> a woman sitting on the beach with her dog
 ```
-</details>
-#### Running the model on GPU
-##### In full precision
-<details>
-<summary> Click to expand </summary>
-```python
-import requests
-from PIL import Image
-from transformers import BlipProcessor, BlipForConditionalGeneration
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
-img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
-raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-# conditional image captioning
-text = "a photography of"
-inputs = processor(raw_image, text, return_tensors="pt").to("cuda")
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
-# >>> a photography of a woman and her dog
-# unconditional image captioning
-inputs = processor(raw_image, return_tensors="pt").to("cuda")
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
->>> a woman sitting on the beach with her dog
-```
-</details>
-##### In half precision (`float16`)
-<details>
-<summary> Click to expand </summary>
-```python
-import torch
-import requests
-from PIL import Image
-from transformers import BlipProcessor, BlipForConditionalGeneration
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to("cuda")
-img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
-raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-# conditional image captioning
-text = "a photography of"
-inputs = processor(raw_image, text, return_tensors="pt").to("cuda", torch.float16)
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
-# >>> a photography of a woman and her dog
-# unconditional image captioning
-inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
->>> a woman sitting on the beach with her dog
-```
-</details>
-## BibTex and citation info
-```
-@misc{https://doi.org/10.48550/arxiv.2201.12086,
-  doi = {10.48550/ARXIV.2201.12086},
-  url = {https://arxiv.org/abs/2201.12086},
-  author = {Li, Junnan and Li, Dongxu and Xiong, Caiming and Hoi, Steven},
-  keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
-  title = {BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation},
-  publisher = {arXiv},
-  year = {2022},
-  copyright = {Creative Commons Attribution 4.0 International}
-}
-```

 ---
+library_name: peft
+license: apache-2.0
+base_model: unsloth/mistral-7b-instruct-v0.2
 tags:
+- axolotl
+- generated_from_trainer
+model-index:
+- name: bbdecd00-d8b9-464e-9454-e600bc6d1772
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.4.1`
+```yaml
+adapter: lora
+base_model: unsloth/mistral-7b-instruct-v0.2
+bf16: auto
+chat_template: llama3
+dataset_prepared_path: null
+datasets:
+- data_files:
+  - 424a669f72441b3b_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/424a669f72441b3b_train_data.json
+  type:
+    field_input: intent
+    field_instruction: instruction
+    field_output: response
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+debug: null
+deepspeed: null
+early_stopping_patience: null
+eval_max_new_tokens: 128
+eval_table_size: null
+evals_per_epoch: 5
+flash_attention: true
+fp16: null
+fsdp: null
+fsdp_config: null
+gradient_accumulation_steps: 4
+gradient_checkpointing: false
+group_by_length: false
+hub_model_id: duyphu/bbdecd00-d8b9-464e-9454-e600bc6d1772
+hub_repo: null
+hub_strategy: checkpoint
+hub_token: null
+learning_rate: 0.0001
+load_in_4bit: false
+load_in_8bit: false
+local_rank: null
+logging_steps: 5
+lora_alpha: 16
+lora_dropout: 0.05
+lora_fan_in_fan_out: null
+lora_model_dir: null
+lora_r: 8
+lora_target_linear: true
+lr_scheduler: cosine
+max_steps: 50
+micro_batch_size: 2
+mlflow_experiment_name: /tmp/424a669f72441b3b_train_data.json
+model_type: AutoModelForCausalLM
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+output_dir: miner_id_24
+pad_to_sequence_len: true
+resume_from_checkpoint: null
+s2_attention: null
+sample_packing: false
+saves_per_epoch: 4
+sequence_len: 512
+strict: false
+tf32: false
+tokenizer_type: AutoTokenizer
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.05
+wandb_entity: null
+wandb_mode: online
+wandb_name: bbdecd00-d8b9-464e-9454-e600bc6d1772
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: bbdecd00-d8b9-464e-9454-e600bc6d1772
+warmup_steps: 10
+weight_decay: 0.0
+xformers_attention: null
 ```
+</details><br>
+# bbdecd00-d8b9-464e-9454-e600bc6d1772
+This model is a fine-tuned version of [unsloth/mistral-7b-instruct-v0.2](https://huggingface.co/unsloth/mistral-7b-instruct-v0.2) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5076
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- training_steps: 50
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.0000 | 1    | 0.6332          |
+| 2.2524        | 0.0004 | 10   | 0.5776          |
+| 2.1579        | 0.0008 | 20   | 0.5359          |
+| 2.0424        | 0.0013 | 30   | 0.5147          |
+| 1.8087        | 0.0017 | 40   | 0.5086          |
+| 1.9481        | 0.0021 | 50   | 0.5076          |
+### Framework versions
+- PEFT 0.13.2
+- Transformers 4.46.0
+- Pytorch 2.5.0+cu124
+- Datasets 3.0.1
+- Tokenizers 0.20.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/mistral-7b-instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "v_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "o_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b10395c2efc64d435af4f240a6c350d8f177e7189c5d3266f1e702c2e2034e8b
+size 84047370

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f24c3cfbb258d2bc717c08ad327df0af70fcc1a91839f617d146fde5842385d
+size 83945296

config.json CHANGED Viewed

@@ -1,169 +1,30 @@
 {
-  "_commit_hash": null,
   "architectures": [
-    "BlipForConditionalGeneration"
   ],
-  "image_text_hidden_size": 256,
-  "initializer_factor": 1.0,
-  "logit_scale_init_value": 2.6592,
-  "model_type": "blip",
-  "projection_dim": 512,
-  "text_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_probs_dropout_prob": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": 30522,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 2,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "gelu",
-    "hidden_dropout_prob": 0.0,
-    "hidden_size": 768,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 3072,
-    "is_decoder": true,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-12,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_position_embeddings": 512,
-    "min_length": 0,
-    "model_type": "blip_text_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_hidden_layers": 12,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": 0,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 768,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": 102,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.26.0.dev0",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "use_cache": true,
-    "vocab_size": 30524
-  },
-  "torch_dtype": "float32",
-  "transformers_version": null,
-  "vision_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "gelu",
-    "hidden_size": 768,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "image_size": 384,
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 3072,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
-    "model_type": "blip_vision_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_channels": 3,
-    "num_hidden_layers": 12,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "patch_size": 16,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 512,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.26.0.dev0",
-    "typical_p": 1.0,
-    "use_bfloat16": false
-  }
 }

 {
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "unsloth/mistral-7b-instruct-v0.2",
   "architectures": [
+    "MistralForCausalLM"
   ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.0",
+  "unsloth_version": "2024.9",
+  "use_cache": false,
+  "vocab_size": 32000
 }

last-checkpoint/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: unsloth/mistral-7b-instruct-v0.2
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

last-checkpoint/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/mistral-7b-instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "v_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "o_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

last-checkpoint/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f24c3cfbb258d2bc717c08ad327df0af70fcc1a91839f617d146fde5842385d
+size 83945296

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:057b5f2a11de48a8e21286097ed5865bed1e9be2cce0353edae1f05693c00c95
+size 43122580

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f16b181bbe413099cfca1ccdbe07d5a9a6db31431c5d5e02c8e1ec9e64e7035a
+size 14244

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
+size 1064

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,151 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.00210737052842316,
+  "eval_steps": 10,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 4.21474105684632e-05,
+      "eval_loss": 0.6331943273544312,
+      "eval_runtime": 1127.3109,
+      "eval_samples_per_second": 8.862,
+      "eval_steps_per_second": 4.431,
+      "step": 1
+    },
+    {
+      "epoch": 0.000210737052842316,
+      "grad_norm": 4.398820400238037,
+      "learning_rate": 5e-05,
+      "loss": 2.8283,
+      "step": 5
+    },
+    {
+      "epoch": 0.000421474105684632,
+      "grad_norm": 3.8103866577148438,
+      "learning_rate": 0.0001,
+      "loss": 2.2524,
+      "step": 10
+    },
+    {
+      "epoch": 0.000421474105684632,
+      "eval_loss": 0.5776350498199463,
+      "eval_runtime": 1131.3004,
+      "eval_samples_per_second": 8.831,
+      "eval_steps_per_second": 4.415,
+      "step": 10
+    },
+    {
+      "epoch": 0.000632211158526948,
+      "grad_norm": 3.405839204788208,
+      "learning_rate": 9.619397662556435e-05,
+      "loss": 2.2289,
+      "step": 15
+    },
+    {
+      "epoch": 0.000842948211369264,
+      "grad_norm": 2.9374308586120605,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 2.1579,
+      "step": 20
+    },
+    {
+      "epoch": 0.000842948211369264,
+      "eval_loss": 0.5359046459197998,
+      "eval_runtime": 1133.2705,
+      "eval_samples_per_second": 8.815,
+      "eval_steps_per_second": 4.408,
+      "step": 20
+    },
+    {
+      "epoch": 0.00105368526421158,
+      "grad_norm": 3.117830276489258,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 1.9743,
+      "step": 25
+    },
+    {
+      "epoch": 0.001264422317053896,
+      "grad_norm": 3.5818986892700195,
+      "learning_rate": 5e-05,
+      "loss": 2.0424,
+      "step": 30
+    },
+    {
+      "epoch": 0.001264422317053896,
+      "eval_loss": 0.5147128701210022,
+      "eval_runtime": 1132.2037,
+      "eval_samples_per_second": 8.824,
+      "eval_steps_per_second": 4.412,
+      "step": 30
+    },
+    {
+      "epoch": 0.001475159369896212,
+      "grad_norm": 3.088940382003784,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 2.0867,
+      "step": 35
+    },
+    {
+      "epoch": 0.001685896422738528,
+      "grad_norm": 2.9631552696228027,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 1.8087,
+      "step": 40
+    },
+    {
+      "epoch": 0.001685896422738528,
+      "eval_loss": 0.5086004734039307,
+      "eval_runtime": 1133.9336,
+      "eval_samples_per_second": 8.81,
+      "eval_steps_per_second": 4.405,
+      "step": 40
+    },
+    {
+      "epoch": 0.001896633475580844,
+      "grad_norm": 3.090548038482666,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 2.2553,
+      "step": 45
+    },
+    {
+      "epoch": 0.00210737052842316,
+      "grad_norm": 3.1793596744537354,
+      "learning_rate": 0.0,
+      "loss": 1.9481,
+      "step": 50
+    },
+    {
+      "epoch": 0.00210737052842316,
+      "eval_loss": 0.5075772404670715,
+      "eval_runtime": 1131.3867,
+      "eval_samples_per_second": 8.83,
+      "eval_steps_per_second": 4.415,
+      "step": 50
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 13,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.708853041299456e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e54d6da192478432de773967107b1ddd0b051ecbe5cb42ca145bc39daa07375a
+size 6776

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,30 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json CHANGED Viewed

@@ -1,21 +1,45 @@
 {
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "name_or_path": "bert-base-uncased",
-  "never_split": null,
-  "pad_token": "[PAD]",
-  "processor_class": "BlipProcessor",
-  "sep_token": "[SEP]",
-  "special_tokens_map_file": null,
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]",
-  "model_input_names": [
-    "input_ids",
-    "attention_mask"
-  ]
 }

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e54d6da192478432de773967107b1ddd0b051ecbe5cb42ca145bc39daa07375a
+size 6776