{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Fine-tune DeepSeek with a Synthetic Reasoning Dataset\n", "\n", "This notebook demonstrates the fine-tuning process of `unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit` using a synthetic reasoning dataset.\n", "\n", "It provides a complete walkthrough of the fine-tuning process after generating synthetic data using the Synthetic Data Generator. For a comprehensive explanation of the methodology and additional details, refer to the blog post: [Fine-tune DeepSeek with a Synthetic Reasoning Dataset](https://huggingface.co/blog/sdiazlor/fine-tune-deepseek-with-a-synthetic-reasoning-data)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Getting Started" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Install the Dependencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install datasets\n", "!pip install unsloth" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import the Required Libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from datasets import load_dataset\n", "from transformers import TrainingArguments\n", "from trl import SFTTrainer\n", "from unsloth import is_bfloat16_supported, FastLanguageModel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Configure the Environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MODEL = \"unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit\"\n", "REPO_NAME = \"sdiazlor\" # your HF username here\n", "MODEL_NAME = \"deepseek-r1-distill-qwen-1.5-unsloth-sft-python\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the Model and Tokenizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 316, "referenced_widgets": [ "5b30f4877b554974bb8cdfc9814bb967", "1b216f51a8714e9dab82dc844fed17bf", "a87fd32f965c41e68249ba9bf4234691", "0b7b5a1ecb964a9cbd035b559438fb52", "453b1c07c5e345fbac74856842c28784", "5718ee0ebe1f4cadaf79c14d2a8e5339", "a03e6373939e489c87c8ed6546dc03d6", "b9974277d199419699e9b2a584f6198f", "210d0c13cbec434097f0a9b6b68d456d", "31abf79b170443fabbbb924ca4220e4c", "0142b4aeab9b4d87819b86c26a908f7a", "a4362f76cffa45a89675d66f8f51d440", "178df7b3c6b74583b70b6ebfd2272336", "921c02b29a564833ae10ac54f220c1d4", "4b8d88e2c0634338affb3b4d81e77974", "67dfa13c01f64b589e04c053f89ec671", "2bdd42e1bb57415584d2a38edfe8ea6b", "398508c460594d22b7fdbf3be15d761b", "a383ed5654194fefa9312b747d5d6ed6", "03ea6c67112843c9933b3e1a768b77f6", "b7c04ccedc1c48adb34f615f24b8c424", "db31df39f56546b6a64a55f97b91a8f2", "f187dd6f0dbf4f49bca90bd394f27e23", "1a0a4ae3d6c147c3bbb72e703858aedc", "d3de80a9bafa4b4b9c13c4361d6213db", "84a304df3ea4459ebcd3706f57404eba", "6904076b530c44ae8a34b2fd424b89c0", "5e51b3b8054d477fa3af1cf1682d4304", "213c0e1e238849f7a071b2f9bcf9c433", "dc84dfd9af984f02a09770dffc8f3001", "38e3a03149754b048a0459f4e5d00e2a", "f74d4bc9f00b4cac8de83792f405ad5a", "4ddc8e1a851c4e9da6ede032d40908f0", "24dfeff577da4393bdaa5de47bf5d096", "0b722c2760634ce3856dbf2f3ac329fe", "8d652113d0964829a65fbbcab211e141", "d0eb3820302646e9a848da2f936aa9eb", "d089d26bfd1a4a8488133f0f82b120c3", "5094f58022ca4565873196fde589bec8", "d180a951e872463c8c39f28f61b3b734", "02302f414c384422920e3e310f597063", "a8001e449dca4533bd56918d8da1a69d", "19b6b7f26e394d9fb7c2992d71b1fc14", "007ba2f3f37c48f8a5e7e21a4a2e5a71", "65a71e24010b4ba4884dc0bb2fe37e1f", "99831f0a5b204b24923d5ca3d30c42a9", "52b547101e8848cb9e5881c3e63eca3c", "b6e67b2b00be4835962dc5939e25a289", "747b3c80a240453c986bc8db01072394", "29d52353241742bf882ade4bc4eeb402", "c2cd6a947bda4a09ad56b0c2069a9166", "02db395a497443adad709a5a5b2ee463", "28b224e830e24bbdb9abf547b3789769", "a76024ba39194881b63302a11383e84e", "40f0399f117845b08eff5c5ed84f137d" ] }, "id": "QmUBVEnvCDJv", "outputId": "977ab27e-7773-4b20-e16b-7df3f1d8994e" }, "outputs": [], "source": [ "# Load the 4bit pre quantized model of deepseek and the tokenizer\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = MODEL,\n", " max_seq_length = 2048,\n", " dtype = None,\n", " load_in_4bit = True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6bZsfBuZDeCL", "outputId": "4bdd6bfa-47c5-47cd-cd45-475107258a89" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2025.2.5 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.\n" ] } ], "source": [ "# We add the LORA adapters to the model\n", "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r=16,\n", " target_modules=[\n", " \"q_proj\",\n", " \"k_proj\",\n", " \"v_proj\",\n", " \"o_proj\",\n", " \"gate_proj\",\n", " \"up_proj\",\n", " \"down_proj\",\n", " ],\n", " lora_alpha=16,\n", " lora_dropout=0,\n", " bias=\"none\",\n", " use_gradient_checkpointing=\"unsloth\",\n", " random_state=3407,\n", " use_rslora=False,\n", " loftq_config=None,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pre-process the Synthetic Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 154, "referenced_widgets": [ "e6ffb622331241239867b1477d3da76b", "5d835002db074abea3d5b87e0ce72618", "8b79b01a42614f4e9ae6550703561236", "a9f65983f1f24d7c9cceaf65c2191c7d", "560be4516ac74cd5a29a0b8304976810", "679a9d5ae6fb4916a2261f556969f3a0", "09095674ac234d19be3f0454146201c2", "225b460bd91b4995b81cb967a7f8c42c", "04b79a83f42745449910b1733a8496c7", "36f85ae64a1e440b8b15d0c5625bd23f", "fd3643434d0f477f90a59dc8e0bf64f2" ] }, "id": "LjY75GoYUCB8", "outputId": "35602bba-6cdf-4cb5-ac87-967679f10350" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'Below is an instruction that describes a task, paired with a question that provides further context. \\nWrite a response that appropriately answer the question. \\nBefore answering, think carefully but concisely about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\\n\\n### Instruction:\\nYou are an expert programmer with advanced knowledge in Python. Your task is to provide concise and easy-to-understand solutions. Please answer the following python question. \\n\\n### Question:\\nHow can I solve a Rubik\\'s Cube? \\n\\n\\n\\n### Response:\\nOkay, so I want to solve a Rubik\\'s Cube, but I\\'m not really sure where to start. I remember seeing people solve them quickly, but it looks so complicated. Let me try to break it down.\\n\\nFirst, I think I need to understand the structure of the cube. It has six faces, each with nine smaller squares of different colors. The goal is to have each face all one color. But how do the pieces move?\\n\\nI\\'ve heard about algorithms before. Maybe that\\'s the key. I guess algorithms are specific sequences of moves that solve certain parts of the cube. But I\\'m not sure which ones to use or in what order.\\n\\nI remember someone mentioning something called the cross. That\\'s probably the first step. So, I should start by making a cross on one face. Let\\'s pick the white face for the cross. I need to get all the white edges aligned with their corresponding center colors.\\n\\nOnce the cross is done, the next step might be to solve the corners of that face. Each corner has three colors, so I need to make sure they match the adjacent center colors. Maybe there\\'s a specific algorithm for inserting the corners correctly without messing up the cross.\\n\\nAfter the first layer is done, I think the next part is the middle layer. This involves moving the edge pieces between the first and last layers. I\\'ve heard terms like F, R, U, etc., which stand for front, right, up, etc. Maybe the algorithm for the middle layer involves moving a piece from the top to the correct position.\\n\\nThen comes the last layer, which is the trickiest part. I think this involves orienting the edges so they all face the right way, and then permuting the corners. I remember something about the \"OLL\" and \"PLL\" steps. OLL is orienting the last layer, and PLL is permuting it. There are a lot of algorithms for each, so it\\'s important to learn the common ones first.\\n\\nI\\'m a bit confused about how to recognize when to use each algorithm. Maybe I should start with the most common ones, like the cross, then F2L, OLL, and PLL. Each step builds on the previous one, so I shouldn\\'t skip ahead.\\n\\nI also wonder about the notation. F means moving the front face clockwise, F\\' is counterclockwise, and F2 is turning it twice. Understanding this notation is crucial for following the algorithms.\\n\\nI should probably practice each step separately. Start with solving the cross, then move on to the corners, and so on. It might take a while to get each step right, but with practice, it should become easier.\\n\\nWait, what if I get stuck? Maybe I should look up some tutorials or guides that break it down step by step. There are probably detailed explanations and videos that can help me visualize the moves better.\\n\\nIn summary, solving a Rubik\\'s Cube seems to involve a series of structured steps, each with specific algorithms. I need to learn each step, practice the moves, and gradually build up to solving the entire cube. It might be challenging, but breaking it down into smaller parts makes it manageable.\\n\\n\\nTo solve a Rubik\\'s Cube, follow this structured approach:\\n\\n1. **Understand the Structure**: Familiarize yourself with the cube\\'s layout, noting that each face has nine smaller squares and six faces in total.\\n\\n2. **Learn Notation**: Understand the basic move notations (F, R, U, etc.) and their directions (clockwise, counterclockwise, and 180-degree turns).\\n\\n3. **Step 1: Solve the Cross**:\\n - Begin with the white face.\\n - Align the white edges with their corresponding center colors.\\n\\n4. **Step 2: Solve the Corners**:\\n - Position the corners so they match adjacent center colors, using specific algorithms to insert them correctly without disrupting the cross.\\n\\n5. **Step 3: Solve the Middle Layer**:\\n - Move edge pieces between the first and last layers, using algorithms to place them correctly.\\n\\n6. **Step 4: Orient the Last Layer (OLL)**:\\n - Use algorithms to orient the last layer\\'s edges so they all face the correct way.\\n\\n7. **Step 5: Permute the Last Layer (PLL)**:\\n - Apply algorithms to permute the corners, ensuring they are in the correct positions.\\n\\n8. **Practice and Resources**:\\n - Practice each step separately, gradually building up skills.\\n - Use tutorials or guides for detailed explanations and visual aids.\\n\\nBy following these steps and practicing each algorithm, you can systematically solve the Rubik\\'s Cube.\\n<|end▁of▁sentence|>'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Prepare the dataset\n", "\n", "prompt_style = \"\"\"Below is an instruction that describes a task, paired with a question that provides further context.\n", "Write a response that appropriately answer the question.\n", "Before answering, think carefully but concisely about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n", "\n", "### Instruction:\n", "You are an expert programmer with advanced knowledge in Python. Your task is to provide concise and easy-to-understand solutions. Please answer the following python question.\n", "\n", "### Question:\n", "{}\n", "\n", "### Response:\n", "{}\n", "\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token\n", "\n", "\n", "def formatting_prompts_func(examples):\n", " prompts = examples[\"prompt\"]\n", " completions = examples[\"completion\"]\n", " texts = []\n", " for prompt,completion in zip(prompts, completions):\n", " text = prompt_style.format(prompt, completion) + EOS_TOKEN\n", " texts.append(text)\n", " return {\n", " \"text\": texts,\n", " }\n", "\n", "\n", "dataset = load_dataset(\"sdiazlor/python-reasoning-dataset\", split=\"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)\n", "dataset[\"text\"][0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train the Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "e865b621b4704786998d9e5511bcb522", "08220a32e37f4b1dab636ea06a786a1f", "2a76087509dd482a81c297ba44e2ae2e", "1b7f8aad80c0464b819ee74b7f76d40e", "6849f7a48eff4ab8a7e01a3e26da7645", "6b9552cc29ab4ac7936618bf1f0b1c5c", "706d0954642a4ef5b8e3d4ece2fa556e", "9eea1e4a483b4298a6ad67dfca0ff122", "2f1ede630a654aeebe7d80640ee9674a", "e411f85b5a0047c4b510565585215493", "fb6566ed643a473e892e68b55fa3ef29" ] }, "id": "95_Nn-89DhsL", "outputId": "9d245291-dc8e-48be-d05c-beeb65b3769f" }, "outputs": [], "source": [ "# Configure the trainer\n", "trainer = SFTTrainer(\n", " model=model,\n", " tokenizer=tokenizer,\n", " train_dataset=dataset,\n", " dataset_text_field=\"text\",\n", " max_seq_length=2048,\n", " dataset_num_proc=2,\n", " packing=False,\n", " args=TrainingArguments(\n", " per_device_train_batch_size=2,\n", " gradient_accumulation_steps=4,\n", " warmup_steps=5,\n", " num_train_epochs=3,\n", " learning_rate=2e-4,\n", " fp16=not is_bfloat16_supported(),\n", " bf16=is_bfloat16_supported(),\n", " logging_steps=1,\n", " optim=\"adamw_8bit\",\n", " weight_decay=0.01,\n", " lr_scheduler_type=\"linear\",\n", " seed=3407,\n", " output_dir=\"outputs\",\n", " report_to=\"none\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "yqxqAZ7KJ4oL", "outputId": "bb329be2-eced-4843-cb86-00ed8395e992" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 500 | Num Epochs = 3\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 186\n", " \"-____-\" Number of trainable parameters = 18,464,768\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
1 | \n", "0.828400 | \n", "
2 | \n", "0.784400 | \n", "
3 | \n", "0.838800 | \n", "
4 | \n", "0.849500 | \n", "
5 | \n", "0.732300 | \n", "
6 | \n", "0.670100 | \n", "
7 | \n", "0.709900 | \n", "
8 | \n", "0.688500 | \n", "
9 | \n", "0.613600 | \n", "
10 | \n", "0.626500 | \n", "
11 | \n", "0.729400 | \n", "
12 | \n", "0.682100 | \n", "
13 | \n", "0.540100 | \n", "
14 | \n", "0.591000 | \n", "
15 | \n", "0.604800 | \n", "
16 | \n", "0.611600 | \n", "
17 | \n", "0.604000 | \n", "
18 | \n", "0.617800 | \n", "
19 | \n", "0.610100 | \n", "
20 | \n", "0.651400 | \n", "
21 | \n", "0.580700 | \n", "
22 | \n", "0.620100 | \n", "
23 | \n", "0.664400 | \n", "
24 | \n", "0.675600 | \n", "
25 | \n", "0.513200 | \n", "
26 | \n", "0.498600 | \n", "
27 | \n", "0.646800 | \n", "
28 | \n", "0.501700 | \n", "
29 | \n", "0.537800 | \n", "
30 | \n", "0.592300 | \n", "
31 | \n", "0.488400 | \n", "
32 | \n", "0.533100 | \n", "
33 | \n", "0.567700 | \n", "
34 | \n", "0.565900 | \n", "
35 | \n", "0.638700 | \n", "
36 | \n", "0.564400 | \n", "
37 | \n", "0.479600 | \n", "
38 | \n", "0.644000 | \n", "
39 | \n", "0.486400 | \n", "
40 | \n", "0.598800 | \n", "
41 | \n", "0.595200 | \n", "
42 | \n", "0.508000 | \n", "
43 | \n", "0.504900 | \n", "
44 | \n", "0.613700 | \n", "
45 | \n", "0.517800 | \n", "
46 | \n", "0.571700 | \n", "
47 | \n", "0.568900 | \n", "
48 | \n", "0.507400 | \n", "
49 | \n", "0.536600 | \n", "
50 | \n", "0.681900 | \n", "
51 | \n", "0.469500 | \n", "
52 | \n", "0.530200 | \n", "
53 | \n", "0.601400 | \n", "
54 | \n", "0.531000 | \n", "
55 | \n", "0.470400 | \n", "
56 | \n", "0.535800 | \n", "
57 | \n", "0.615800 | \n", "
58 | \n", "0.557500 | \n", "
59 | \n", "0.620600 | \n", "
60 | \n", "0.497700 | \n", "
61 | \n", "0.556100 | \n", "
62 | \n", "0.561300 | \n", "
63 | \n", "0.607200 | \n", "
64 | \n", "0.556200 | \n", "
65 | \n", "0.538400 | \n", "
66 | \n", "0.529800 | \n", "
67 | \n", "0.580100 | \n", "
68 | \n", "0.573100 | \n", "
69 | \n", "0.466100 | \n", "
70 | \n", "0.498400 | \n", "
71 | \n", "0.590800 | \n", "
72 | \n", "0.632500 | \n", "
73 | \n", "0.472400 | \n", "
74 | \n", "0.523400 | \n", "
75 | \n", "0.562500 | \n", "
76 | \n", "0.552200 | \n", "
77 | \n", "0.548400 | \n", "
78 | \n", "0.523300 | \n", "
79 | \n", "0.593300 | \n", "
80 | \n", "0.483600 | \n", "
81 | \n", "0.585400 | \n", "
82 | \n", "0.554700 | \n", "
83 | \n", "0.413900 | \n", "
84 | \n", "0.589400 | \n", "
85 | \n", "0.484100 | \n", "
86 | \n", "0.461000 | \n", "
87 | \n", "0.570700 | \n", "
88 | \n", "0.545900 | \n", "
89 | \n", "0.542300 | \n", "
90 | \n", "0.502100 | \n", "
91 | \n", "0.554100 | \n", "
92 | \n", "0.554000 | \n", "
93 | \n", "0.468700 | \n", "
94 | \n", "0.535800 | \n", "
95 | \n", "0.539100 | \n", "
96 | \n", "0.479600 | \n", "
97 | \n", "0.499100 | \n", "
98 | \n", "0.518300 | \n", "
99 | \n", "0.593800 | \n", "
100 | \n", "0.573200 | \n", "
101 | \n", "0.546400 | \n", "
102 | \n", "0.599600 | \n", "
103 | \n", "0.465200 | \n", "
104 | \n", "0.472400 | \n", "
105 | \n", "0.556100 | \n", "
106 | \n", "0.498800 | \n", "
107 | \n", "0.486900 | \n", "
108 | \n", "0.529000 | \n", "
109 | \n", "0.480100 | \n", "
110 | \n", "0.525900 | \n", "
111 | \n", "0.489700 | \n", "
112 | \n", "0.510600 | \n", "
113 | \n", "0.628300 | \n", "
114 | \n", "0.413200 | \n", "
115 | \n", "0.577800 | \n", "
116 | \n", "0.515000 | \n", "
117 | \n", "0.539300 | \n", "
118 | \n", "0.459200 | \n", "
119 | \n", "0.533700 | \n", "
120 | \n", "0.501700 | \n", "
121 | \n", "0.528400 | \n", "
122 | \n", "0.475900 | \n", "
123 | \n", "0.437600 | \n", "
124 | \n", "0.551700 | \n", "
125 | \n", "0.464600 | \n", "
126 | \n", "0.442300 | \n", "
127 | \n", "0.611100 | \n", "
128 | \n", "0.425300 | \n", "
129 | \n", "0.516900 | \n", "
130 | \n", "0.469100 | \n", "
131 | \n", "0.486200 | \n", "
132 | \n", "0.492100 | \n", "
133 | \n", "0.511100 | \n", "
134 | \n", "0.559500 | \n", "
135 | \n", "0.537600 | \n", "
136 | \n", "0.426800 | \n", "
137 | \n", "0.474200 | \n", "
138 | \n", "0.543500 | \n", "
139 | \n", "0.539800 | \n", "
140 | \n", "0.481500 | \n", "
141 | \n", "0.481400 | \n", "
142 | \n", "0.562000 | \n", "
143 | \n", "0.409100 | \n", "
144 | \n", "0.440900 | \n", "
145 | \n", "0.437700 | \n", "
146 | \n", "0.427300 | \n", "
147 | \n", "0.393100 | \n", "
148 | \n", "0.480300 | \n", "
149 | \n", "0.509300 | \n", "
150 | \n", "0.450200 | \n", "
151 | \n", "0.530500 | \n", "
152 | \n", "0.475300 | \n", "
153 | \n", "0.521300 | \n", "
154 | \n", "0.519500 | \n", "
155 | \n", "0.539400 | \n", "
156 | \n", "0.433300 | \n", "
157 | \n", "0.495400 | \n", "
158 | \n", "0.415200 | \n", "
159 | \n", "0.608800 | \n", "
160 | \n", "0.524700 | \n", "
161 | \n", "0.438700 | \n", "
162 | \n", "0.504800 | \n", "
163 | \n", "0.455700 | \n", "
164 | \n", "0.455100 | \n", "
165 | \n", "0.592300 | \n", "
166 | \n", "0.565700 | \n", "
167 | \n", "0.480800 | \n", "
168 | \n", "0.546100 | \n", "
169 | \n", "0.463100 | \n", "
170 | \n", "0.573400 | \n", "
171 | \n", "0.500700 | \n", "
172 | \n", "0.516700 | \n", "
173 | \n", "0.572000 | \n", "
174 | \n", "0.411700 | \n", "
175 | \n", "0.452700 | \n", "
176 | \n", "0.424900 | \n", "
177 | \n", "0.489200 | \n", "
178 | \n", "0.574300 | \n", "
179 | \n", "0.479700 | \n", "
180 | \n", "0.487800 | \n", "
181 | \n", "0.513700 | \n", "
182 | \n", "0.492800 | \n", "
183 | \n", "0.535100 | \n", "
184 | \n", "0.501100 | \n", "
185 | \n", "0.450400 | \n", "
186 | \n", "0.484500 | \n", "
"
],
"text/plain": [
"