objects76 commited on
Commit
61aa42f
β€’
1 Parent(s): 8c37946

Upload phi-3.5-mini-fc.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. phi-3.5-mini-fc.ipynb +837 -0
phi-3.5-mini-fc.ipynb ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/home/ubuntu/miniforge3/envs/unsloth_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "Token is valid (permission: write).\n",
21
+ "Your token has been saved in your configured git credential helpers (store).\n",
22
+ "Your token has been saved to /home/ubuntu/.cache/huggingface/token\n",
23
+ "Login successful\n"
24
+ ]
25
+ }
26
+ ],
27
+ "source": [
28
+ "%reload_ext autoreload\n",
29
+ "%autoreload 2\n",
30
+ "if '__file__' not in globals():\n",
31
+ " __file__, __name__ = globals()['__vsc_ipynb_file__'], '__ipynb__'\n",
32
+ " import types, sys; sys.modules['__ipynb__'] = types.ModuleType('__ipynb__')\n",
33
+ " from IPython.core.magic import register_cell_magic\n",
34
+ " @register_cell_magic\n",
35
+ " def skip_if(flag, cell): exec(cell, globals())if flag and not eval(flag) else print('Cell skipped...')\n",
36
+ "\n",
37
+ "import sys, os\n",
38
+ "if os.path.abspath('.') not in sys.path: sys.path.append(os.path.abspath('.'))\n",
39
+ "\n",
40
+ "import os, huggingface_hub # !pip install huggingface_hub[hf_transfer]\n",
41
+ "huggingface_hub.login(token = os.environ.get('HF_TOKEN'), add_to_git_credential=True)\n",
42
+ "\n",
43
+ "import inspect\n",
44
+ "from pathlib import Path\n",
45
+ "from tqdm import tqdm\n",
46
+ "from glob import glob\n",
47
+ "import numpy as np; np.set_printoptions(precision=8, suppress=True); np.random.seed(42)\n",
48
+ "\n",
49
+ "class whitechar:\n",
50
+ " def __ror__(self, x): return x.replace('\\n', '\\\\n\\n').replace('\\t', '\\\\t\\t').replace(' ', '⎡')\n",
51
+ "wc = whitechar()\n",
52
+ "\n",
53
+ "class text_color:\n",
54
+ " black,red,green,yellow,blue,magenta,cyan,white,gray = [*range(30,38), 90] # fgclr, [*range(90,98), ''] # light-fgclr\n",
55
+ " bold, italic, underline, strike = 1, 3, 4, 9 # attrs supported on vscode notebook.\n",
56
+ " def __init__(self, fg,bg=0,attr=0):\n",
57
+ " attr = f'{attr};' if attr > 0 else ''\n",
58
+ " bg = f'{bg+10};' if bg > 0 else ''\n",
59
+ " self.clr = f'\\33[{attr}{bg}{fg}m'\n",
60
+ "\n",
61
+ " def __ror__(self, obj): return self.clr + str(obj) + '\\33[0m'\n",
62
+ " @staticmethod\n",
63
+ " def all(): return (text_color(clr) for clr in [*range(30,38), 90])\n",
64
+ "\n",
65
+ "black,red,green,yellow,blue,magenta,cyan,white,gray = text_color.all()\n",
66
+ "\n",
67
+ "class cout:\n",
68
+ " def __ror__(self, obj): print(f'[{inspect.stack()[1].lineno}] {str(obj)}')\n",
69
+ " def __call__(self, *args, **kwds): print(f'[{inspect.stack()[1].lineno+1}]', *args, **kwds)\n",
70
+ "out = cout()\n",
71
+ "\n",
72
+ "\n",
73
+ "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' #can help a little with VRAM reqs."
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "import unsloth\n",
83
+ "import torch\n",
84
+ "\n",
85
+ "import wandb\n",
86
+ "wandb.init(project=\"phi-3.5-mini\", name='run-phi-3.5-mini')\n",
87
+ "os.environ[\"WANDB_NOTEBOOK_NAME\"] =__file__\n",
88
+ "\n",
89
+ "max_seq_length = 4096\n",
90
+ "use_4bit = False\n",
91
+ "\n",
92
+ "model, tokenizer = unsloth.FastLanguageModel.from_pretrained(\n",
93
+ " model_name=\"microsoft/Phi-3.5-mini-instruct\",\n",
94
+ " max_seq_length=max_seq_length,\n",
95
+ " dtype=None, # auto detect\n",
96
+ " load_in_4bit=use_4bit,\n",
97
+ ")\n",
98
+ "\n",
99
+ "model = unsloth.FastLanguageModel.get_peft_model(\n",
100
+ " model,\n",
101
+ " r=16,\n",
102
+ " target_modules=[\n",
103
+ " \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
104
+ " \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
105
+ " lora_alpha=16,\n",
106
+ " lora_dropout=0,\n",
107
+ " bias=\"none\",\n",
108
+ " use_gradient_checkpointing=\"unsloth\",\n",
109
+ " random_state=3407,\n",
110
+ " use_rslora=False, # True\n",
111
+ " loftq_config=None,\n",
112
+ ")\n"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "tokenizer.padding_side = 'left' # right -> left\n",
122
+ "# tokenizer.add_bos_token = False\n",
123
+ "# tokenizer.truncation_side # right\n",
124
+ "tokenizer.special_tokens_map_extended\n",
125
+ "tokenizer.special_tokens_map\n",
126
+ "tokenizer.added_tokens_decoder\n",
127
+ "\n",
128
+ "tokenizer | out"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": null,
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "%%skip_if\n",
138
+ "tokenizer.apply_chat_template(\n",
139
+ " [\n",
140
+ " {\"role\": \"user\", \"content\": \"hello\"},\n",
141
+ " {\"role\": \"assistant\", \"content\": \"hi\"},\n",
142
+ " {\"role\": \"user\", \"content\": \"how are you?\"},\n",
143
+ " ],\n",
144
+ " tokenize=False,\n",
145
+ " add_generation_prompt=True,\n",
146
+ ")|wc | out\n"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": null,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "from datasets import load_dataset\n",
156
+ "\n",
157
+ "data_collator = None\n",
158
+ "ds_xlam_fc = load_dataset('json', data_files={\n",
159
+ " 'train': 'xlam-dataset-60k-qwen2-train.jsonl',\n",
160
+ "})\n",
161
+ "\n",
162
+ "# sample 3000 datas from ds_xlam_fc\n",
163
+ "ds_xlam_fc3k = ds_xlam_fc['train'].shuffle(seed=42).select(range(3000))\n",
164
+ "ds_xlam_fc3k[0]\n"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": null,
170
+ "metadata": {},
171
+ "outputs": [],
172
+ "source": [
173
+ "def formatting_prompts_func(examples):\n",
174
+ " print( 'formatting_prompts_func:', len(examples) )\n",
175
+ " convos = examples[\"messages\"]\n",
176
+ " texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]\n",
177
+ " return {\"text\": texts}\n",
178
+ "\n",
179
+ "dataset_formatted = ds_xlam_fc3k.map(\n",
180
+ " formatting_prompts_func, batched=True,\n",
181
+ " remove_columns=[\"messages\", \"type\", \"source\"])\n",
182
+ "\n",
183
+ "dataset_formatted[199] | out"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": null,
189
+ "metadata": {},
190
+ "outputs": [],
191
+ "source": [
192
+ "import trl\n",
193
+ "\n",
194
+ "def print_tokens_with_ids(txt):\n",
195
+ " tokens = tokenizer.tokenize(txt, add_special_tokens=False)\n",
196
+ " token_ids = tokenizer.encode(txt, add_special_tokens=False)\n",
197
+ " return list(zip(tokens, token_ids))\n",
198
+ "\n",
199
+ "input_text = tokenizer.apply_chat_template(\n",
200
+ " [dict(role=\"user\", content=\"\\n111 222\"),\n",
201
+ " dict(role=\"assistant\", content=\"\\nxxx yyy\\n\"),\n",
202
+ " dict(role=\"user\", content=\"444 555\\n\"),],\n",
203
+ " tokenize=False, add_generation_prompt=True)\n",
204
+ "print_tokens_with_ids(input_text) | out\n",
205
+ "print_tokens_with_ids(\"\\n<|assistant|>\\n\") | green | out\n",
206
+ "\n",
207
+ "\n",
208
+ "data_collator = trl.DataCollatorForCompletionOnlyLM([32001], tokenizer=tokenizer)\n",
209
+ "\n"
210
+ ]
211
+ },
212
+ {
213
+ "cell_type": "code",
214
+ "execution_count": null,
215
+ "metadata": {},
216
+ "outputs": [],
217
+ "source": [
218
+ "\n",
219
+ "import transformers\n",
220
+ "import unsloth\n",
221
+ "import trl\n",
222
+ "\n",
223
+ "train_args = trl.SFTConfig(\n",
224
+ " per_device_train_batch_size=8,\n",
225
+ " gradient_accumulation_steps=1,\n",
226
+ "\n",
227
+ " warmup_steps=5,\n",
228
+ " # max_steps=60,\n",
229
+ " num_train_epochs = 1,\n",
230
+ "\n",
231
+ " # learning_rate=2e-4,\n",
232
+ " learning_rate = 5e-5,\n",
233
+ " bf16= unsloth.is_bfloat16_supported(),\n",
234
+ " optim= \"adamw_torch\", # \"adamw_8bit\",\n",
235
+ "\n",
236
+ " weight_decay=0.01,\n",
237
+ " lr_scheduler_type=\"linear\",\n",
238
+ " seed=3407,\n",
239
+ "\n",
240
+ " gradient_checkpointing = True,\n",
241
+ " gradient_checkpointing_kwargs = {\"use_reentrant\": True},\n",
242
+ "\n",
243
+ " output_dir = \"outputs_unslot\",\n",
244
+ " run_name = \"phi35-inst\",\n",
245
+ " logging_steps=1,\n",
246
+ " report_to= 'wandb',\n",
247
+ ")\n",
248
+ "\n",
249
+ "trainer = trl.SFTTrainer(\n",
250
+ " model=model,\n",
251
+ " tokenizer=tokenizer,\n",
252
+ "\n",
253
+ " train_dataset=dataset_formatted,\n",
254
+ " dataset_text_field=\"text\",\n",
255
+ " data_collator=data_collator,\n",
256
+ " packing=False,\n",
257
+ "\n",
258
+ " max_seq_length=max_seq_length,\n",
259
+ " dataset_num_proc=2,\n",
260
+ "\n",
261
+ " args = train_args,\n",
262
+ ")\n"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": null,
268
+ "metadata": {},
269
+ "outputs": [],
270
+ "source": [
271
+ "\n",
272
+ "gpu_stats = torch.cuda.get_device_properties(0)\n",
273
+ "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
274
+ "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
275
+ "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
276
+ "print(f\"{start_gpu_memory} GB of memory reserved.\")\n",
277
+ "\n",
278
+ "trainer_stats = trainer.train()\n",
279
+ "\n",
280
+ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
281
+ "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
282
+ "used_percentage = round(used_memory / max_memory * 100, 3)\n",
283
+ "lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)\n",
284
+ "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
285
+ "print(f\"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.\")\n",
286
+ "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
287
+ "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
288
+ "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
289
+ "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")\n",
290
+ "\n",
291
+ "model.save_pretrained_merged('outputs_unslot/model', tokenizer, save_method = \"merged_16bit\",) # for best quality\n",
292
+ "\n",
293
+ "import unsloth\n",
294
+ "unsloth.FastLanguageModel.for_inference(model) # Enable native 2x faster inference"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": null,
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": [
303
+ "# model.save_pretrained_merged('outputs_unslot/model', tokenizer, save_method = \"merged_16bit\",) # for best quality\n",
304
+ "model.save_pretrained_merged('outputs_unslot/model/lora', tokenizer, save_method = \"lora\",)\n"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "markdown",
309
+ "metadata": {},
310
+ "source": [
311
+ "# inference"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "markdown",
316
+ "metadata": {},
317
+ "source": [
318
+ "### load weight from saved"
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": null,
324
+ "metadata": {},
325
+ "outputs": [],
326
+ "source": [
327
+ "import transformers, peft, torch, unsloth\n",
328
+ "\n",
329
+ "try:\n",
330
+ " del model\n",
331
+ " del tokenizer\n",
332
+ " torch.cuda.empty_cache()\n",
333
+ "except:\n",
334
+ " pass\n",
335
+ "\n",
336
+ "if 1: # loading from hf\n",
337
+ " repo_name = \"objects76/phi-3.5-fc\" # phi-3.5-mini\n",
338
+ " repo_name = \"outputs_unslot/merged-model\"\n",
339
+ " model = transformers.AutoModelForCausalLM.from_pretrained(\n",
340
+ " repo_name, revision=\"main\",\n",
341
+ " torch_dtype=torch.bfloat16,\n",
342
+ " device_map=\"auto\",\n",
343
+ " trust_remote_code=True,\n",
344
+ " # attn_implementation=\"flash_attention_2\", #turn off if not supported by model or your GPU\n",
345
+ " )\n",
346
+ " model.config.use_cache = True\n",
347
+ " model.eval()\n",
348
+ "\n",
349
+ " tokenizer = transformers.AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)\n",
350
+ "\n",
351
+ "elif 1:\n",
352
+ " max_seq_length = 4096\n",
353
+ " dtype = None\n",
354
+ " load_in_4bit = True\n",
355
+ "\n",
356
+ " # model, tokenizer = unsloth.FastLanguageModel.from_pretrained(\n",
357
+ " # model_name = \"outputs_unslot/lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
358
+ " # max_seq_length = max_seq_length,\n",
359
+ " # dtype = dtype,\n",
360
+ " # load_in_4bit = load_in_4bit,\n",
361
+ " # )\n",
362
+ " # unsloth.FastLanguageModel.for_inference(model)\n",
363
+ "\n",
364
+ " # I highly do NOT suggest - use Unsloth if possible\n",
365
+ " base_model = \"microsoft/Phi-3.5-mini-instruct\"\n",
366
+ "\n",
367
+ " model = peft.AutoPeftModelForCausalLM.from_pretrained(\n",
368
+ " \"outputs_unslot/lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
369
+ " load_in_4bit = False,\n",
370
+ " )\n",
371
+ " tokenizer = transformers.AutoTokenizer.from_pretrained(\"outputs_unslot/lora_model\")\n",
372
+ " model.config.use_cache = True\n",
373
+ " model.eval()\n",
374
+ " print(model.config)\n",
375
+ "\n",
376
+ "tokenizer | out\n"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": null,
382
+ "metadata": {},
383
+ "outputs": [],
384
+ "source": [
385
+ "import datasets\n",
386
+ "\n",
387
+ "ds_test = datasets.load_dataset(\"json\", data_files=\"xlam-dataset-60k-qwen2-test.jsonl\")['train']\n",
388
+ "ds_test"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "metadata": {},
395
+ "outputs": [],
396
+ "source": [
397
+ "import re,json\n",
398
+ "\n",
399
+ "def infer(M,T, messages):\n",
400
+ " input_ids = T.apply_chat_template(\n",
401
+ " messages,\n",
402
+ " tokenize=True,\n",
403
+ " add_generation_prompt=True,\n",
404
+ " max_length=T.model_max_length,\n",
405
+ " padding=False,\n",
406
+ " truncation=True,\n",
407
+ " return_tensors='pt',\n",
408
+ " ).to(M.device)\n",
409
+ "\n",
410
+ " text_streamer = None # transformers.TextStreamer(tokenizer, skip_prompt = True)\n",
411
+ " outputs = M.generate(\n",
412
+ " input_ids = input_ids, # attention_mask=attention_mask,\n",
413
+ " streamer = text_streamer,\n",
414
+ " max_new_tokens=1024,\n",
415
+ " eos_token_id=tokenizer.eos_token_id,\n",
416
+ " pad_token_id=tokenizer.pad_token_id,\n",
417
+ " do_sample=True, temperature=0.01, top_p= 0.01,\n",
418
+ " use_cache=True)\n",
419
+ "\n",
420
+ " # gen = T.batch_decode(outputs, skip_special_tokens=True)[0]\n",
421
+ " gen = T.decode(outputs[0, input_ids.shape[-1]:], skip_special_tokens=True)\n",
422
+ "\n",
423
+ " return input_ids, outputs, gen\n",
424
+ "\n",
425
+ "\n",
426
+ "\n",
427
+ "for i, sample in enumerate(ds_test):\n",
428
+ " message = sample[\"messages\"]\n",
429
+ " user_content = message[0][\"content\"]\n",
430
+ " ans = message[1][\"content\"]\n",
431
+ " _, _, gen = infer(model, tokenizer, message[:-1])\n",
432
+ " gen = gen.replace('```json', '').replace('```', '')\n",
433
+ "\n",
434
+ " # normalize = lambda s: re.sub(r\"\"\"\\s+\"\"\", \"\", s, flags=re.MULTILINE|re.DOTALL)\n",
435
+ " # gen = normalize(gen.replace('```json', '').replace('```', ''))\n",
436
+ " # ans = normalize(ans)\n",
437
+ " true,false = True,False\n",
438
+ " gen = json.dumps(eval(gen), indent=3)\n",
439
+ " ans = json.dumps(eval(ans), indent=3)\n",
440
+ " if gen != ans:\n",
441
+ " # print(user_content|gray)\n",
442
+ " print(f\"{i} ----------------\"|gray)\n",
443
+ " print('gen:', gen|green)\n",
444
+ " print('ans:', ans)"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "markdown",
449
+ "metadata": {},
450
+ "source": [
451
+ "### no fc"
452
+ ]
453
+ },
454
+ {
455
+ "cell_type": "code",
456
+ "execution_count": null,
457
+ "metadata": {},
458
+ "outputs": [],
459
+ "source": [
460
+ "def generate(input_text, system_prompt, max_length=0):\n",
461
+ " messages = [\n",
462
+ " {\"role\": \"system\", \"content\": system_prompt},\n",
463
+ " {\"role\": \"user\", \"content\": input_text}\n",
464
+ " ]\n",
465
+ " _, _, prediction = infer(model, tokenizer, messages)\n",
466
+ " print(input_text|gray)\n",
467
+ " print(prediction|green)\n"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "code",
472
+ "execution_count": null,
473
+ "metadata": {},
474
+ "outputs": [],
475
+ "source": [
476
+ "prompts = [\n",
477
+ "(\"Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?\", 11),\n",
478
+ "(\"Yes or no: Would a pear sink in water?\",None),\n",
479
+ "(\"How would you bring me something that isn’t a fruit?\",None),\n",
480
+ "(\"How many keystrokes are needed to type the numbers from 1 to 500?\", 1392),\n",
481
+ "(\"The concert was scheduled to be on 06/01/1943, but was delayed by one day to today. What is the date 10 days ago in MM/DD/YYYY?\", \"05/23/1943\"),\n",
482
+ "(\"Take the last letters of the words in 'Lady Gaga' and concatenate them.\", 'ya'),\n",
483
+ "(\"Sammy wanted to go to where the people were. Where might he go?\",None),\n",
484
+ "(\"Is the following sentence plausible? 'Joao Moutinho caught the screen pass in the NFC championship.'\", 'not plausible'),\n",
485
+ "(\"A coin is heads up. Maybelle flips the coin. Shalonda does not flip the coin. Is the coin still heads up?\", \"No\"),\n",
486
+ "('Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?', 9)\n",
487
+ "]\n",
488
+ "\n",
489
+ "line = 2\n",
490
+ "generate(prompts[line-2][0],\n",
491
+ " system_prompt=\"Write out your reasoning step-by-step to be sure you get the right answers!\",\n",
492
+ " max_length=512)\n",
493
+ "\n",
494
+ "if prompts[line-2][1]:\n",
495
+ " print('answer:', prompts[line-2][1])"
496
+ ]
497
+ },
498
+ {
499
+ "cell_type": "markdown",
500
+ "metadata": {},
501
+ "source": [
502
+ "# Saving weights"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "markdown",
507
+ "metadata": {},
508
+ "source": [
509
+ "### lora"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": null,
515
+ "metadata": {},
516
+ "outputs": [],
517
+ "source": [
518
+ "model.save_pretrained(\"outputs_unslot/lora_model\") # Local saving\n",
519
+ "tokenizer.save_pretrained(\"outputs_unslot/lora_model\")\n",
520
+ "# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
521
+ "# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
522
+ "\n",
523
+ "# loading\n",
524
+ "# from unsloth import FastLanguageModel\n",
525
+ "# model, tokenizer = FastLanguageModel.from_pretrained(\n",
526
+ "# model_name = \"outputs_unslot/lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
527
+ "# max_seq_length = max_seq_length,\n",
528
+ "# dtype = dtype,\n",
529
+ "# load_in_4bit = load_in_4bit,\n",
530
+ "# )\n",
531
+ "# FastLanguageModel.for_inference(model)\n"
532
+ ]
533
+ },
534
+ {
535
+ "cell_type": "markdown",
536
+ "metadata": {},
537
+ "source": [
538
+ "### hf-model"
539
+ ]
540
+ },
541
+ {
542
+ "cell_type": "code",
543
+ "execution_count": null,
544
+ "metadata": {},
545
+ "outputs": [],
546
+ "source": [
547
+ "# model.save_pretrained_merged(\"outputs_unslot/hf-model\", tokenizer, save_method = \"merged_16bit\",)\n",
548
+ "# merge with lora model\n",
549
+ "# model.save_pretrained(\"outputs_unslot/hf-model\") # safe_serialization = None\n",
550
+ "# tokenizer.save_pretrained(\"outputs_unslot/hf-model\")"
551
+ ]
552
+ },
553
+ {
554
+ "cell_type": "markdown",
555
+ "metadata": {},
556
+ "source": [
557
+ "### gguf\n",
558
+ "- it will make hf weight(safe tensor)"
559
+ ]
560
+ },
561
+ {
562
+ "cell_type": "code",
563
+ "execution_count": null,
564
+ "metadata": {},
565
+ "outputs": [],
566
+ "source": [
567
+ "\n",
568
+ "model.save_pretrained_gguf(\"outputs_unslot/model\", tokenizer, quantization_method=\"q8_0\")\n",
569
+ "model.save_pretrained_gguf(\"outputs_unslot/model\", tokenizer, quantization_method=\"q4_k_m\")\n",
570
+ "model.save_pretrained_gguf(\"outputs_unslot/model\", tokenizer, quantization_method=\"q5_k_m\")\n"
571
+ ]
572
+ },
573
+ {
574
+ "cell_type": "code",
575
+ "execution_count": null,
576
+ "metadata": {},
577
+ "outputs": [],
578
+ "source": [
579
+ "\n",
580
+ "def create_modelfile(gguf_path, template, output_modelfile):\n",
581
+ " strip_lines = lambda x : '\\n'.join(line.strip() for line in x.splitlines())\n",
582
+ " assert Path(gguf_path).exists()\n",
583
+ " output_modelfile = Path(gguf_path).parent / Path(output_modelfile).name\n",
584
+ " gguf_path = Path(gguf_path).name\n",
585
+ "\n",
586
+ " with open(output_modelfile, \"w\") as f:\n",
587
+ " f.write(f\"FROM {gguf_path}\\n\\n\")\n",
588
+ " f.write(f\"TEMPLATE \\\"\\\"\\\"{strip_lines(template)}\\\"\\\"\\\"\\n\")\n",
589
+ " # f.write(strip_lines(\"\"\"\n",
590
+ " # SYSTEM \"You are a helpful assistant.\"\n",
591
+ "\n",
592
+ " # PARAMETER temperature 0.01\n",
593
+ " # PARAMETER top_p 0.01\n",
594
+ " # PARAMETER stop \"<|im_end|>\" \"\"\")+'\\n')\n",
595
+ "\n",
596
+ "phi_3_5_template = \"\"\"\\\n",
597
+ "{{ if .System }}<|system|>\n",
598
+ "{{ .System }}<|end|>\n",
599
+ "{{ end }}{{ if .Prompt }}<|user|>\n",
600
+ "{{ .Prompt }}<|end|>\n",
601
+ "{{ end }}<|assistant|>\n",
602
+ "{{ .Response }}<|end|>\"\"\"\n",
603
+ "\n",
604
+ "create_modelfile(\"outputs_unslot/model/unsloth.Q8_0.gguf\", phi_3_5_template, \"phi3.5-fc-Q8_0.mf\")\n",
605
+ "\n",
606
+ "!ollama create jjkim76/phi3.5-fc:Q8_0 -f outputs_unslot/model/phi3.5-fc-Q8_0.mf\n",
607
+ "!ollama push jjkim76/phi3.5-fc:Q8_0\n"
608
+ ]
609
+ },
610
+ {
611
+ "cell_type": "markdown",
612
+ "metadata": {},
613
+ "source": [
614
+ "### merge with lora"
615
+ ]
616
+ },
617
+ {
618
+ "cell_type": "code",
619
+ "execution_count": null,
620
+ "metadata": {},
621
+ "outputs": [],
622
+ "source": [
623
+ "import transformers, peft\n",
624
+ "\n",
625
+ "pretrained_path = 'microsoft/Phi-3.5-mini-instruct'\n",
626
+ "# model_max_length = 4096\n",
627
+ "\n",
628
+ "tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
629
+ " pretrained_path,\n",
630
+ " # model_max_length=model_max_length,\n",
631
+ " trust_remote_code=True,\n",
632
+ " )\n",
633
+ "\n",
634
+ "config = transformers.AutoConfig.from_pretrained(\n",
635
+ " pretrained_path\n",
636
+ ")\n",
637
+ "\n",
638
+ "model = transformers.AutoModelForCausalLM.from_pretrained(\n",
639
+ " pretrained_path,\n",
640
+ " # config=config,\n",
641
+ " device_map=\"auto\",\n",
642
+ " trust_remote_code=True,\n",
643
+ " torch_dtype=torch.bfloat16,\n",
644
+ " # use_flash_attention_2=True,\n",
645
+ ")\n",
646
+ "\n",
647
+ "lora_path = 'outputs_unslot/model/lora'\n",
648
+ "lora_model = peft.PeftModel.from_pretrained(model, lora_path, torch_dtype=torch.float16)"
649
+ ]
650
+ },
651
+ {
652
+ "cell_type": "code",
653
+ "execution_count": null,
654
+ "metadata": {},
655
+ "outputs": [],
656
+ "source": [
657
+ "merged_model = lora_model.merge_and_unload()\n",
658
+ "merged_model"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "execution_count": null,
664
+ "metadata": {},
665
+ "outputs": [],
666
+ "source": [
667
+ "# merged_model.save_pretrained('outputs_unslot/merged-model')\n",
668
+ "tokenizer.save_pretrained('outputs_unslot/merged-model')"
669
+ ]
670
+ },
671
+ {
672
+ "cell_type": "markdown",
673
+ "metadata": {},
674
+ "source": [
675
+ "### upload to hf"
676
+ ]
677
+ },
678
+ {
679
+ "cell_type": "code",
680
+ "execution_count": 2,
681
+ "metadata": {},
682
+ "outputs": [
683
+ {
684
+ "name": "stderr",
685
+ "output_type": "stream",
686
+ "text": [
687
+ "Loading checkpoint shards: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:02<00:00, 1.18s/it]\n"
688
+ ]
689
+ }
690
+ ],
691
+ "source": [
692
+ "import torch, peft, transformers\n",
693
+ "\n",
694
+ "model_local = \"outputs_unslot/merged-model\"\n",
695
+ "\n",
696
+ "model = transformers.AutoModelForCausalLM.from_pretrained(\n",
697
+ " model_local,\n",
698
+ " device_map=\"auto\",\n",
699
+ " torch_dtype=torch.bfloat16,\n",
700
+ " trust_remote_code=True,\n",
701
+ " low_cpu_mem_usage=True,\n",
702
+ " attn_implementation=\"flash_attention_2\",\n",
703
+ ")\n",
704
+ "\n",
705
+ "model.config.use_cache = True\n",
706
+ "model.eval()\n",
707
+ "\n",
708
+ "tokenizer = transformers.AutoTokenizer.from_pretrained(model_local)"
709
+ ]
710
+ },
711
+ {
712
+ "cell_type": "code",
713
+ "execution_count": 3,
714
+ "metadata": {},
715
+ "outputs": [
716
+ {
717
+ "name": "stdout",
718
+ "output_type": "stream",
719
+ "text": [
720
+ "model_id, revision=\"objects76/Phi-3.5-mini-instruct-fc\", \"main\"\n"
721
+ ]
722
+ },
723
+ {
724
+ "name": "stderr",
725
+ "output_type": "stream",
726
+ "text": [
727
+ "tokenizer.model: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500k/500k [00:01<00:00, 463kB/s]\n",
728
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:01<00:00, 1.30s/it]\n",
729
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:15<00:00, 7.94s/it]\n"
730
+ ]
731
+ },
732
+ {
733
+ "data": {
734
+ "text/plain": [
735
+ "CommitInfo(commit_url='https://huggingface.co/objects76/Phi-3.5-mini-instruct-fc/commit/8c379468173a1f6b05d39488cb7c61a51eeed72e', commit_message='instruction following added. without system message.', commit_description='', oid='8c379468173a1f6b05d39488cb7c61a51eeed72e', pr_url=None, pr_revision=None, pr_num=None)"
736
+ ]
737
+ },
738
+ "execution_count": 3,
739
+ "metadata": {},
740
+ "output_type": "execute_result"
741
+ }
742
+ ],
743
+ "source": [
744
+ "from huggingface_hub import HfApi, HfFolder\n",
745
+ "from datetime import datetime\n",
746
+ "\n",
747
+ "tag = 'main' # datetime.now().strftime(\"%m%d\")\n",
748
+ "repo_name = f\"objects76/Phi-3.5-mini-instruct-fc\"\n",
749
+ "print(f'model_id, revision=\"{repo_name}\", \"{tag}\"')\n",
750
+ "\n",
751
+ "\n",
752
+ "# Instantiate HfApi to interact with Hugging Face Hub\n",
753
+ "tokenizer.push_to_hub(repo_name, revision=tag)\n",
754
+ "model.push_to_hub(repo_name, revision=tag,\n",
755
+ " max_shard_size=\"5GB\",\n",
756
+ " # safe_serialization=True, private=True,\n",
757
+ " commit_message='instruction following added. without system message.')\n",
758
+ "\n",
759
+ "#\n",
760
+ "# upload additional files\n",
761
+ "#\n",
762
+ "# srcfiles = 'output_qwen/2024-08-09/source.tar.gz'\n",
763
+ "# !tar -czvf {srcfiles} alpaca*.jsonl xlam*.jsonl qwen2-xlam-5.py\n"
764
+ ]
765
+ },
766
+ {
767
+ "cell_type": "code",
768
+ "execution_count": 5,
769
+ "metadata": {},
770
+ "outputs": [],
771
+ "source": [
772
+ "import json\n",
773
+ "def build_readme(readme_path, outpath):\n",
774
+ " # get prompt sample\n",
775
+ " test_samples = []\n",
776
+ " with open(\"xlam-dataset-60k-qwen2-test.jsonl\") as fp:\n",
777
+ " test_samples = [json.loads(line) for line in fp]\n",
778
+ " messages = test_samples[5]['messages']\n",
779
+ "\n",
780
+ " with open(readme_path) as fp:\n",
781
+ " txt = fp.read()\n",
782
+ " txt = txt.replace('USERMSG_PLACE_HOLDER', messages[0]['content'].replace('```', '[TRIPLE_BACKTICK]'))\n",
783
+ " # txt = txt.replace('MESSAGE_PLACE_HOLDER', str(messages[:-1]))\n",
784
+ " txt = txt.replace('RESPONSE_PLACE_HOLDER', str(messages[-1]['content']))\n",
785
+ " with open(outpath, 'w') as fp:\n",
786
+ " fp.write(txt)\n",
787
+ "\n",
788
+ "\n",
789
+ " # tokenizer.apply_chat_template(messages, tokenize=False) | out\n",
790
+ " # evals = []\n",
791
+ " # for i, sample in enumerate(test_samples):\n",
792
+ " # print(f'-- sample {i} --'| magenta)\n",
793
+ " # evals.append( get_answer(sample['messages'], model, tokenizer) )\n",
794
+ "\n",
795
+ "build_readme('outputs_unslot/README_TEMPLATE.md', model_local + '/README.md')\n",
796
+ "\n",
797
+ "if 1: # update file\n",
798
+ " local_files = [\n",
799
+ " # \"output_qwen/README.md\",\n",
800
+ " __file__\n",
801
+ " ]\n",
802
+ " api = HfApi()\n",
803
+ " for file_path in local_files:\n",
804
+ " # target_path = file_path.replace('output_qwen/2024-08-08/', '')\n",
805
+ " target_path = Path(file_path).name\n",
806
+ " api.upload_file(\n",
807
+ " path_or_fileobj= file_path,\n",
808
+ " path_in_repo= target_path,\n",
809
+ " repo_id=repo_name, revision=tag,\n",
810
+ " repo_type=\"model\",\n",
811
+ " # commit_message=\"Add README.md file\"\n",
812
+ " )"
813
+ ]
814
+ }
815
+ ],
816
+ "metadata": {
817
+ "kernelspec": {
818
+ "display_name": "fcv3-2",
819
+ "language": "python",
820
+ "name": "python3"
821
+ },
822
+ "language_info": {
823
+ "codemirror_mode": {
824
+ "name": "ipython",
825
+ "version": 3
826
+ },
827
+ "file_extension": ".py",
828
+ "mimetype": "text/x-python",
829
+ "name": "python",
830
+ "nbconvert_exporter": "python",
831
+ "pygments_lexer": "ipython3",
832
+ "version": "3.10.14"
833
+ }
834
+ },
835
+ "nbformat": 4,
836
+ "nbformat_minor": 2
837
+ }