Upload phi-3.5-mini-fc.ipynb with huggingface_hub
Browse files- phi-3.5-mini-fc.ipynb +837 -0
@@ -0,0 +1,837 @@
1 |
2 |
"cells": [
3 |
4 |
"cell_type": "code",
5 |
"execution_count": 1,
6 |
"metadata": {},
7 |
"outputs": [
8 |
9 |
"name": "stderr",
10 |
"output_type": "stream",
11 |
"text": [
12 |
"/home/ubuntu/miniforge3/envs/unsloth_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13 |
" from .autonotebook import tqdm as notebook_tqdm\n"
14 |
15 |
16 |
17 |
"name": "stdout",
18 |
"output_type": "stream",
19 |
"text": [
20 |
"Token is valid (permission: write).\n",
21 |
"Your token has been saved in your configured git credential helpers (store).\n",
22 |
"Your token has been saved to /home/ubuntu/.cache/huggingface/token\n",
23 |
"Login successful\n"
24 |
25 |
26 |
27 |
"source": [
28 |
"%reload_ext autoreload\n",
29 |
"%autoreload 2\n",
30 |
"if '__file__' not in globals():\n",
31 |
" __file__, __name__ = globals()['__vsc_ipynb_file__'], '__ipynb__'\n",
32 |
" import types, sys; sys.modules['__ipynb__'] = types.ModuleType('__ipynb__')\n",
33 |
" from IPython.core.magic import register_cell_magic\n",
34 |
" @register_cell_magic\n",
35 |
" def skip_if(flag, cell): exec(cell, globals())if flag and not eval(flag) else print('Cell skipped...')\n",
36 |
37 |
"import sys, os\n",
38 |
"if os.path.abspath('.') not in sys.path: sys.path.append(os.path.abspath('.'))\n",
39 |
40 |
"import os, huggingface_hub # !pip install huggingface_hub[hf_transfer]\n",
41 |
"huggingface_hub.login(token = os.environ.get('HF_TOKEN'), add_to_git_credential=True)\n",
42 |
43 |
"import inspect\n",
44 |
"from pathlib import Path\n",
45 |
"from tqdm import tqdm\n",
46 |
"from glob import glob\n",
47 |
"import numpy as np; np.set_printoptions(precision=8, suppress=True); np.random.seed(42)\n",
48 |
49 |
"class whitechar:\n",
50 |
" def __ror__(self, x): return x.replace('\\n', '\\\\n\\n').replace('\\t', '\\\\t\\t').replace(' ', 'β΅')\n",
51 |
"wc = whitechar()\n",
52 |
53 |
"class text_color:\n",
54 |
" black,red,green,yellow,blue,magenta,cyan,white,gray = [*range(30,38), 90] # fgclr, [*range(90,98), ''] # light-fgclr\n",
55 |
" bold, italic, underline, strike = 1, 3, 4, 9 # attrs supported on vscode notebook.\n",
56 |
" def __init__(self, fg,bg=0,attr=0):\n",
57 |
" attr = f'{attr};' if attr > 0 else ''\n",
58 |
" bg = f'{bg+10};' if bg > 0 else ''\n",
59 |
" self.clr = f'\\33[{attr}{bg}{fg}m'\n",
60 |
61 |
" def __ror__(self, obj): return self.clr + str(obj) + '\\33[0m'\n",
62 |
" @staticmethod\n",
63 |
" def all(): return (text_color(clr) for clr in [*range(30,38), 90])\n",
64 |
65 |
"black,red,green,yellow,blue,magenta,cyan,white,gray = text_color.all()\n",
66 |
67 |
"class cout:\n",
68 |
" def __ror__(self, obj): print(f'[{inspect.stack()[1].lineno}] {str(obj)}')\n",
69 |
" def __call__(self, *args, **kwds): print(f'[{inspect.stack()[1].lineno+1}]', *args, **kwds)\n",
70 |
"out = cout()\n",
71 |
72 |
73 |
"os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' #can help a little with VRAM reqs."
74 |
75 |
76 |
77 |
"cell_type": "code",
78 |
"execution_count": null,
79 |
"metadata": {},
80 |
"outputs": [],
81 |
"source": [
82 |
"import unsloth\n",
83 |
"import torch\n",
84 |
85 |
"import wandb\n",
86 |
"wandb.init(project=\"phi-3.5-mini\", name='run-phi-3.5-mini')\n",
87 |
"os.environ[\"WANDB_NOTEBOOK_NAME\"] =__file__\n",
88 |
89 |
"max_seq_length = 4096\n",
90 |
"use_4bit = False\n",
91 |
92 |
"model, tokenizer = unsloth.FastLanguageModel.from_pretrained(\n",
93 |
" model_name=\"microsoft/Phi-3.5-mini-instruct\",\n",
94 |
" max_seq_length=max_seq_length,\n",
95 |
" dtype=None, # auto detect\n",
96 |
" load_in_4bit=use_4bit,\n",
97 |
98 |
99 |
"model = unsloth.FastLanguageModel.get_peft_model(\n",
100 |
" model,\n",
101 |
" r=16,\n",
102 |
" target_modules=[\n",
103 |
" \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
104 |
" \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
105 |
" lora_alpha=16,\n",
106 |
" lora_dropout=0,\n",
107 |
" bias=\"none\",\n",
108 |
" use_gradient_checkpointing=\"unsloth\",\n",
109 |
" random_state=3407,\n",
110 |
" use_rslora=False, # True\n",
111 |
" loftq_config=None,\n",
112 |
113 |
114 |
115 |
116 |
"cell_type": "code",
117 |
"execution_count": null,
118 |
"metadata": {},
119 |
"outputs": [],
120 |
"source": [
121 |
"tokenizer.padding_side = 'left' # right -> left\n",
122 |
"# tokenizer.add_bos_token = False\n",
123 |
"# tokenizer.truncation_side # right\n",
124 |
125 |
126 |
127 |
128 |
"tokenizer | out"
129 |
130 |
131 |
132 |
"cell_type": "code",
133 |
"execution_count": null,
134 |
"metadata": {},
135 |
"outputs": [],
136 |
"source": [
137 |
138 |
139 |
" [\n",
140 |
" {\"role\": \"user\", \"content\": \"hello\"},\n",
141 |
" {\"role\": \"assistant\", \"content\": \"hi\"},\n",
142 |
" {\"role\": \"user\", \"content\": \"how are you?\"},\n",
143 |
" ],\n",
144 |
" tokenize=False,\n",
145 |
" add_generation_prompt=True,\n",
146 |
")|wc | out\n"
147 |
148 |
149 |
150 |
"cell_type": "code",
151 |
"execution_count": null,
152 |
"metadata": {},
153 |
"outputs": [],
154 |
"source": [
155 |
"from datasets import load_dataset\n",
156 |
157 |
"data_collator = None\n",
158 |
"ds_xlam_fc = load_dataset('json', data_files={\n",
159 |
" 'train': 'xlam-dataset-60k-qwen2-train.jsonl',\n",
160 |
161 |
162 |
"# sample 3000 datas from ds_xlam_fc\n",
163 |
"ds_xlam_fc3k = ds_xlam_fc['train'].shuffle(seed=42).select(range(3000))\n",
164 |
165 |
166 |
167 |
168 |
"cell_type": "code",
169 |
"execution_count": null,
170 |
"metadata": {},
171 |
"outputs": [],
172 |
"source": [
173 |
"def formatting_prompts_func(examples):\n",
174 |
" print( 'formatting_prompts_func:', len(examples) )\n",
175 |
" convos = examples[\"messages\"]\n",
176 |
" texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]\n",
177 |
" return {\"text\": texts}\n",
178 |
179 |
"dataset_formatted = ds_xlam_fc3k.map(\n",
180 |
" formatting_prompts_func, batched=True,\n",
181 |
" remove_columns=[\"messages\", \"type\", \"source\"])\n",
182 |
183 |
"dataset_formatted[199] | out"
184 |
185 |
186 |
187 |
"cell_type": "code",
188 |
"execution_count": null,
189 |
"metadata": {},
190 |
"outputs": [],
191 |
"source": [
192 |
"import trl\n",
193 |
194 |
"def print_tokens_with_ids(txt):\n",
195 |
" tokens = tokenizer.tokenize(txt, add_special_tokens=False)\n",
196 |
" token_ids = tokenizer.encode(txt, add_special_tokens=False)\n",
197 |
" return list(zip(tokens, token_ids))\n",
198 |
199 |
"input_text = tokenizer.apply_chat_template(\n",
200 |
" [dict(role=\"user\", content=\"\\n111 222\"),\n",
201 |
" dict(role=\"assistant\", content=\"\\nxxx yyy\\n\"),\n",
202 |
" dict(role=\"user\", content=\"444 555\\n\"),],\n",
203 |
" tokenize=False, add_generation_prompt=True)\n",
204 |
"print_tokens_with_ids(input_text) | out\n",
205 |
"print_tokens_with_ids(\"\\n<|assistant|>\\n\") | green | out\n",
206 |
207 |
208 |
"data_collator = trl.DataCollatorForCompletionOnlyLM([32001], tokenizer=tokenizer)\n",
209 |
210 |
211 |
212 |
213 |
"cell_type": "code",
214 |
"execution_count": null,
215 |
"metadata": {},
216 |
"outputs": [],
217 |
"source": [
218 |
219 |
"import transformers\n",
220 |
"import unsloth\n",
221 |
"import trl\n",
222 |
223 |
"train_args = trl.SFTConfig(\n",
224 |
" per_device_train_batch_size=8,\n",
225 |
" gradient_accumulation_steps=1,\n",
226 |
227 |
" warmup_steps=5,\n",
228 |
" # max_steps=60,\n",
229 |
" num_train_epochs = 1,\n",
230 |
231 |
" # learning_rate=2e-4,\n",
232 |
" learning_rate = 5e-5,\n",
233 |
" bf16= unsloth.is_bfloat16_supported(),\n",
234 |
" optim= \"adamw_torch\", # \"adamw_8bit\",\n",
235 |
236 |
" weight_decay=0.01,\n",
237 |
" lr_scheduler_type=\"linear\",\n",
238 |
" seed=3407,\n",
239 |
240 |
" gradient_checkpointing = True,\n",
241 |
" gradient_checkpointing_kwargs = {\"use_reentrant\": True},\n",
242 |
243 |
" output_dir = \"outputs_unslot\",\n",
244 |
" run_name = \"phi35-inst\",\n",
245 |
" logging_steps=1,\n",
246 |
" report_to= 'wandb',\n",
247 |
248 |
249 |
"trainer = trl.SFTTrainer(\n",
250 |
" model=model,\n",
251 |
" tokenizer=tokenizer,\n",
252 |
253 |
" train_dataset=dataset_formatted,\n",
254 |
" dataset_text_field=\"text\",\n",
255 |
" data_collator=data_collator,\n",
256 |
" packing=False,\n",
257 |
258 |
" max_seq_length=max_seq_length,\n",
259 |
" dataset_num_proc=2,\n",
260 |
261 |
" args = train_args,\n",
262 |
263 |
264 |
265 |
266 |
"cell_type": "code",
267 |
"execution_count": null,
268 |
"metadata": {},
269 |
"outputs": [],
270 |
"source": [
271 |
272 |
"gpu_stats = torch.cuda.get_device_properties(0)\n",
273 |
"start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
274 |
"max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
275 |
"print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
276 |
"print(f\"{start_gpu_memory} GB of memory reserved.\")\n",
277 |
278 |
"trainer_stats = trainer.train()\n",
279 |
280 |
"used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
281 |
"used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
282 |
"used_percentage = round(used_memory / max_memory * 100, 3)\n",
283 |
"lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)\n",
284 |
"print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
285 |
"print(f\"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.\")\n",
286 |
"print(f\"Peak reserved memory = {used_memory} GB.\")\n",
287 |
"print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
288 |
"print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
289 |
"print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")\n",
290 |
291 |
"model.save_pretrained_merged('outputs_unslot/model', tokenizer, save_method = \"merged_16bit\",) # for best quality\n",
292 |
293 |
"import unsloth\n",
294 |
"unsloth.FastLanguageModel.for_inference(model) # Enable native 2x faster inference"
295 |
296 |
297 |
298 |
"cell_type": "code",
299 |
"execution_count": null,
300 |
"metadata": {},
301 |
"outputs": [],
302 |
"source": [
303 |
"# model.save_pretrained_merged('outputs_unslot/model', tokenizer, save_method = \"merged_16bit\",) # for best quality\n",
304 |
"model.save_pretrained_merged('outputs_unslot/model/lora', tokenizer, save_method = \"lora\",)\n"
305 |
306 |
307 |
308 |
"cell_type": "markdown",
309 |
"metadata": {},
310 |
"source": [
311 |
"# inference"
312 |
313 |
314 |
315 |
"cell_type": "markdown",
316 |
"metadata": {},
317 |
"source": [
318 |
"### load weight from saved"
319 |
320 |
321 |
322 |
"cell_type": "code",
323 |
"execution_count": null,
324 |
"metadata": {},
325 |
"outputs": [],
326 |
"source": [
327 |
"import transformers, peft, torch, unsloth\n",
328 |
329 |
330 |
" del model\n",
331 |
" del tokenizer\n",
332 |
" torch.cuda.empty_cache()\n",
333 |
334 |
" pass\n",
335 |
336 |
"if 1: # loading from hf\n",
337 |
" repo_name = \"objects76/phi-3.5-fc\" # phi-3.5-mini\n",
338 |
" repo_name = \"outputs_unslot/merged-model\"\n",
339 |
" model = transformers.AutoModelForCausalLM.from_pretrained(\n",
340 |
" repo_name, revision=\"main\",\n",
341 |
" torch_dtype=torch.bfloat16,\n",
342 |
" device_map=\"auto\",\n",
343 |
" trust_remote_code=True,\n",
344 |
" # attn_implementation=\"flash_attention_2\", #turn off if not supported by model or your GPU\n",
345 |
" )\n",
346 |
" model.config.use_cache = True\n",
347 |
" model.eval()\n",
348 |
349 |
" tokenizer = transformers.AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)\n",
350 |
351 |
"elif 1:\n",
352 |
" max_seq_length = 4096\n",
353 |
" dtype = None\n",
354 |
" load_in_4bit = True\n",
355 |
356 |
" # model, tokenizer = unsloth.FastLanguageModel.from_pretrained(\n",
357 |
" # model_name = \"outputs_unslot/lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
358 |
" # max_seq_length = max_seq_length,\n",
359 |
" # dtype = dtype,\n",
360 |
" # load_in_4bit = load_in_4bit,\n",
361 |
" # )\n",
362 |
" # unsloth.FastLanguageModel.for_inference(model)\n",
363 |
364 |
" # I highly do NOT suggest - use Unsloth if possible\n",
365 |
" base_model = \"microsoft/Phi-3.5-mini-instruct\"\n",
366 |
367 |
" model = peft.AutoPeftModelForCausalLM.from_pretrained(\n",
368 |
" \"outputs_unslot/lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
369 |
" load_in_4bit = False,\n",
370 |
" )\n",
371 |
" tokenizer = transformers.AutoTokenizer.from_pretrained(\"outputs_unslot/lora_model\")\n",
372 |
" model.config.use_cache = True\n",
373 |
" model.eval()\n",
374 |
" print(model.config)\n",
375 |
376 |
"tokenizer | out\n"
377 |
378 |
379 |
380 |
"cell_type": "code",
381 |
"execution_count": null,
382 |
"metadata": {},
383 |
"outputs": [],
384 |
"source": [
385 |
"import datasets\n",
386 |
387 |
"ds_test = datasets.load_dataset(\"json\", data_files=\"xlam-dataset-60k-qwen2-test.jsonl\")['train']\n",
388 |
389 |
390 |
391 |
392 |
"cell_type": "code",
393 |
"execution_count": null,
394 |
"metadata": {},
395 |
"outputs": [],
396 |
"source": [
397 |
"import re,json\n",
398 |
399 |
"def infer(M,T, messages):\n",
400 |
" input_ids = T.apply_chat_template(\n",
401 |
" messages,\n",
402 |
" tokenize=True,\n",
403 |
" add_generation_prompt=True,\n",
404 |
" max_length=T.model_max_length,\n",
405 |
" padding=False,\n",
406 |
" truncation=True,\n",
407 |
" return_tensors='pt',\n",
408 |
" ).to(M.device)\n",
409 |
410 |
" text_streamer = None # transformers.TextStreamer(tokenizer, skip_prompt = True)\n",
411 |
" outputs = M.generate(\n",
412 |
" input_ids = input_ids, # attention_mask=attention_mask,\n",
413 |
" streamer = text_streamer,\n",
414 |
" max_new_tokens=1024,\n",
415 |
" eos_token_id=tokenizer.eos_token_id,\n",
416 |
" pad_token_id=tokenizer.pad_token_id,\n",
417 |
" do_sample=True, temperature=0.01, top_p= 0.01,\n",
418 |
" use_cache=True)\n",
419 |
420 |
" # gen = T.batch_decode(outputs, skip_special_tokens=True)[0]\n",
421 |
" gen = T.decode(outputs[0, input_ids.shape[-1]:], skip_special_tokens=True)\n",
422 |
423 |
" return input_ids, outputs, gen\n",
424 |
425 |
426 |
427 |
"for i, sample in enumerate(ds_test):\n",
428 |
" message = sample[\"messages\"]\n",
429 |
" user_content = message[0][\"content\"]\n",
430 |
" ans = message[1][\"content\"]\n",
431 |
" _, _, gen = infer(model, tokenizer, message[:-1])\n",
432 |
" gen = gen.replace('```json', '').replace('```', '')\n",
433 |
434 |
" # normalize = lambda s: re.sub(r\"\"\"\\s+\"\"\", \"\", s, flags=re.MULTILINE|re.DOTALL)\n",
435 |
" # gen = normalize(gen.replace('```json', '').replace('```', ''))\n",
436 |
" # ans = normalize(ans)\n",
437 |
" true,false = True,False\n",
438 |
" gen = json.dumps(eval(gen), indent=3)\n",
439 |
" ans = json.dumps(eval(ans), indent=3)\n",
440 |
" if gen != ans:\n",
441 |
" # print(user_content|gray)\n",
442 |
" print(f\"{i} ----------------\"|gray)\n",
443 |
" print('gen:', gen|green)\n",
444 |
" print('ans:', ans)"
445 |
446 |
447 |
448 |
"cell_type": "markdown",
449 |
"metadata": {},
450 |
"source": [
451 |
"### no fc"
452 |
453 |
454 |
455 |
"cell_type": "code",
456 |
"execution_count": null,
457 |
"metadata": {},
458 |
"outputs": [],
459 |
"source": [
460 |
"def generate(input_text, system_prompt, max_length=0):\n",
461 |
" messages = [\n",
462 |
" {\"role\": \"system\", \"content\": system_prompt},\n",
463 |
" {\"role\": \"user\", \"content\": input_text}\n",
464 |
" ]\n",
465 |
" _, _, prediction = infer(model, tokenizer, messages)\n",
466 |
" print(input_text|gray)\n",
467 |
" print(prediction|green)\n"
468 |
469 |
470 |
471 |
"cell_type": "code",
472 |
"execution_count": null,
473 |
"metadata": {},
474 |
"outputs": [],
475 |
"source": [
476 |
"prompts = [\n",
477 |
"(\"Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?\", 11),\n",
478 |
"(\"Yes or no: Would a pear sink in water?\",None),\n",
479 |
"(\"How would you bring me something that isnβt a fruit?\",None),\n",
480 |
"(\"How many keystrokes are needed to type the numbers from 1 to 500?\", 1392),\n",
481 |
"(\"The concert was scheduled to be on 06/01/1943, but was delayed by one day to today. What is the date 10 days ago in MM/DD/YYYY?\", \"05/23/1943\"),\n",
482 |
"(\"Take the last letters of the words in 'Lady Gaga' and concatenate them.\", 'ya'),\n",
483 |
"(\"Sammy wanted to go to where the people were. Where might he go?\",None),\n",
484 |
"(\"Is the following sentence plausible? 'Joao Moutinho caught the screen pass in the NFC championship.'\", 'not plausible'),\n",
485 |
"(\"A coin is heads up. Maybelle flips the coin. Shalonda does not flip the coin. Is the coin still heads up?\", \"No\"),\n",
486 |
"('Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?', 9)\n",
487 |
488 |
489 |
"line = 2\n",
490 |
491 |
" system_prompt=\"Write out your reasoning step-by-step to be sure you get the right answers!\",\n",
492 |
" max_length=512)\n",
493 |
494 |
"if prompts[line-2][1]:\n",
495 |
" print('answer:', prompts[line-2][1])"
496 |
497 |
498 |
499 |
"cell_type": "markdown",
500 |
"metadata": {},
501 |
"source": [
502 |
"# Saving weights"
503 |
504 |
505 |
506 |
"cell_type": "markdown",
507 |
"metadata": {},
508 |
"source": [
509 |
"### lora"
510 |
511 |
512 |
513 |
"cell_type": "code",
514 |
"execution_count": null,
515 |
"metadata": {},
516 |
"outputs": [],
517 |
"source": [
518 |
"model.save_pretrained(\"outputs_unslot/lora_model\") # Local saving\n",
519 |
520 |
"# model.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
521 |
"# tokenizer.push_to_hub(\"your_name/lora_model\", token = \"...\") # Online saving\n",
522 |
523 |
"# loading\n",
524 |
"# from unsloth import FastLanguageModel\n",
525 |
"# model, tokenizer = FastLanguageModel.from_pretrained(\n",
526 |
"# model_name = \"outputs_unslot/lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
527 |
"# max_seq_length = max_seq_length,\n",
528 |
"# dtype = dtype,\n",
529 |
"# load_in_4bit = load_in_4bit,\n",
530 |
"# )\n",
531 |
"# FastLanguageModel.for_inference(model)\n"
532 |
533 |
534 |
535 |
"cell_type": "markdown",
536 |
"metadata": {},
537 |
"source": [
538 |
"### hf-model"
539 |
540 |
541 |
542 |
"cell_type": "code",
543 |
"execution_count": null,
544 |
"metadata": {},
545 |
"outputs": [],
546 |
"source": [
547 |
"# model.save_pretrained_merged(\"outputs_unslot/hf-model\", tokenizer, save_method = \"merged_16bit\",)\n",
548 |
"# merge with lora model\n",
549 |
"# model.save_pretrained(\"outputs_unslot/hf-model\") # safe_serialization = None\n",
550 |
"# tokenizer.save_pretrained(\"outputs_unslot/hf-model\")"
551 |
552 |
553 |
554 |
"cell_type": "markdown",
555 |
"metadata": {},
556 |
"source": [
557 |
"### gguf\n",
558 |
"- it will make hf weight(safe tensor)"
559 |
560 |
561 |
562 |
"cell_type": "code",
563 |
"execution_count": null,
564 |
"metadata": {},
565 |
"outputs": [],
566 |
"source": [
567 |
568 |
"model.save_pretrained_gguf(\"outputs_unslot/model\", tokenizer, quantization_method=\"q8_0\")\n",
569 |
"model.save_pretrained_gguf(\"outputs_unslot/model\", tokenizer, quantization_method=\"q4_k_m\")\n",
570 |
"model.save_pretrained_gguf(\"outputs_unslot/model\", tokenizer, quantization_method=\"q5_k_m\")\n"
571 |
572 |
573 |
574 |
"cell_type": "code",
575 |
"execution_count": null,
576 |
"metadata": {},
577 |
"outputs": [],
578 |
"source": [
579 |
580 |
"def create_modelfile(gguf_path, template, output_modelfile):\n",
581 |
" strip_lines = lambda x : '\\n'.join(line.strip() for line in x.splitlines())\n",
582 |
" assert Path(gguf_path).exists()\n",
583 |
" output_modelfile = Path(gguf_path).parent / Path(output_modelfile).name\n",
584 |
" gguf_path = Path(gguf_path).name\n",
585 |
586 |
" with open(output_modelfile, \"w\") as f:\n",
587 |
" f.write(f\"FROM {gguf_path}\\n\\n\")\n",
588 |
" f.write(f\"TEMPLATE \\\"\\\"\\\"{strip_lines(template)}\\\"\\\"\\\"\\n\")\n",
589 |
" # f.write(strip_lines(\"\"\"\n",
590 |
" # SYSTEM \"You are a helpful assistant.\"\n",
591 |
592 |
" # PARAMETER temperature 0.01\n",
593 |
" # PARAMETER top_p 0.01\n",
594 |
" # PARAMETER stop \"<|im_end|>\" \"\"\")+'\\n')\n",
595 |
596 |
"phi_3_5_template = \"\"\"\\\n",
597 |
"{{ if .System }}<|system|>\n",
598 |
"{{ .System }}<|end|>\n",
599 |
"{{ end }}{{ if .Prompt }}<|user|>\n",
600 |
"{{ .Prompt }}<|end|>\n",
601 |
"{{ end }}<|assistant|>\n",
602 |
"{{ .Response }}<|end|>\"\"\"\n",
603 |
604 |
"create_modelfile(\"outputs_unslot/model/unsloth.Q8_0.gguf\", phi_3_5_template, \"phi3.5-fc-Q8_0.mf\")\n",
605 |
606 |
"!ollama create jjkim76/phi3.5-fc:Q8_0 -f outputs_unslot/model/phi3.5-fc-Q8_0.mf\n",
607 |
"!ollama push jjkim76/phi3.5-fc:Q8_0\n"
608 |
609 |
610 |
611 |
"cell_type": "markdown",
612 |
"metadata": {},
613 |
"source": [
614 |
"### merge with lora"
615 |
616 |
617 |
618 |
"cell_type": "code",
619 |
"execution_count": null,
620 |
"metadata": {},
621 |
"outputs": [],
622 |
"source": [
623 |
"import transformers, peft\n",
624 |
625 |
"pretrained_path = 'microsoft/Phi-3.5-mini-instruct'\n",
626 |
"# model_max_length = 4096\n",
627 |
628 |
"tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
629 |
" pretrained_path,\n",
630 |
" # model_max_length=model_max_length,\n",
631 |
" trust_remote_code=True,\n",
632 |
" )\n",
633 |
634 |
"config = transformers.AutoConfig.from_pretrained(\n",
635 |
" pretrained_path\n",
636 |
637 |
638 |
"model = transformers.AutoModelForCausalLM.from_pretrained(\n",
639 |
" pretrained_path,\n",
640 |
" # config=config,\n",
641 |
" device_map=\"auto\",\n",
642 |
" trust_remote_code=True,\n",
643 |
" torch_dtype=torch.bfloat16,\n",
644 |
" # use_flash_attention_2=True,\n",
645 |
646 |
647 |
"lora_path = 'outputs_unslot/model/lora'\n",
648 |
"lora_model = peft.PeftModel.from_pretrained(model, lora_path, torch_dtype=torch.float16)"
649 |
650 |
651 |
652 |
"cell_type": "code",
653 |
"execution_count": null,
654 |
"metadata": {},
655 |
"outputs": [],
656 |
"source": [
657 |
"merged_model = lora_model.merge_and_unload()\n",
658 |
659 |
660 |
661 |
662 |
"cell_type": "code",
663 |
"execution_count": null,
664 |
"metadata": {},
665 |
"outputs": [],
666 |
"source": [
667 |
"# merged_model.save_pretrained('outputs_unslot/merged-model')\n",
668 |
669 |
670 |
671 |
672 |
"cell_type": "markdown",
673 |
"metadata": {},
674 |
"source": [
675 |
"### upload to hf"
676 |
677 |
678 |
679 |
"cell_type": "code",
680 |
"execution_count": 2,
681 |
"metadata": {},
682 |
"outputs": [
683 |
684 |
"name": "stderr",
685 |
"output_type": "stream",
686 |
"text": [
687 |
"Loading checkpoint shards: 100%|ββββββββββ| 2/2 [00:02<00:00, 1.18s/it]\n"
688 |
689 |
690 |
691 |
"source": [
692 |
"import torch, peft, transformers\n",
693 |
694 |
"model_local = \"outputs_unslot/merged-model\"\n",
695 |
696 |
"model = transformers.AutoModelForCausalLM.from_pretrained(\n",
697 |
" model_local,\n",
698 |
" device_map=\"auto\",\n",
699 |
" torch_dtype=torch.bfloat16,\n",
700 |
" trust_remote_code=True,\n",
701 |
" low_cpu_mem_usage=True,\n",
702 |
" attn_implementation=\"flash_attention_2\",\n",
703 |
704 |
705 |
"model.config.use_cache = True\n",
706 |
707 |
708 |
"tokenizer = transformers.AutoTokenizer.from_pretrained(model_local)"
709 |
710 |
711 |
712 |
"cell_type": "code",
713 |
"execution_count": 3,
714 |
"metadata": {},
715 |
"outputs": [
716 |
717 |
"name": "stdout",
718 |
"output_type": "stream",
719 |
"text": [
720 |
"model_id, revision=\"objects76/Phi-3.5-mini-instruct-fc\", \"main\"\n"
721 |
722 |
723 |
724 |
"name": "stderr",
725 |
"output_type": "stream",
726 |
"text": [
727 |
"tokenizer.model: 100%|ββββββββββ| 500k/500k [00:01<00:00, 463kB/s]\n",
728 |
"100%|ββββββββββ| 1/1 [00:01<00:00, 1.30s/it]\n",
729 |
"100%|ββββββββββ| 2/2 [00:15<00:00, 7.94s/it]\n"
730 |
731 |
732 |
733 |
"data": {
734 |
"text/plain": [
735 |
"CommitInfo(commit_url='https://huggingface.co/objects76/Phi-3.5-mini-instruct-fc/commit/8c379468173a1f6b05d39488cb7c61a51eeed72e', commit_message='instruction following added. without system message.', commit_description='', oid='8c379468173a1f6b05d39488cb7c61a51eeed72e', pr_url=None, pr_revision=None, pr_num=None)"
736 |
737 |
738 |
"execution_count": 3,
739 |
"metadata": {},
740 |
"output_type": "execute_result"
741 |
742 |
743 |
"source": [
744 |
"from huggingface_hub import HfApi, HfFolder\n",
745 |
"from datetime import datetime\n",
746 |
747 |
"tag = 'main' # datetime.now().strftime(\"%m%d\")\n",
748 |
"repo_name = f\"objects76/Phi-3.5-mini-instruct-fc\"\n",
749 |
"print(f'model_id, revision=\"{repo_name}\", \"{tag}\"')\n",
750 |
751 |
752 |
"# Instantiate HfApi to interact with Hugging Face Hub\n",
753 |
"tokenizer.push_to_hub(repo_name, revision=tag)\n",
754 |
"model.push_to_hub(repo_name, revision=tag,\n",
755 |
" max_shard_size=\"5GB\",\n",
756 |
" # safe_serialization=True, private=True,\n",
757 |
" commit_message='instruction following added. without system message.')\n",
758 |
759 |
760 |
"# upload additional files\n",
761 |
762 |
"# srcfiles = 'output_qwen/2024-08-09/source.tar.gz'\n",
763 |
"# !tar -czvf {srcfiles} alpaca*.jsonl xlam*.jsonl qwen2-xlam-5.py\n"
764 |
765 |
766 |
767 |
"cell_type": "code",
768 |
"execution_count": 5,
769 |
"metadata": {},
770 |
"outputs": [],
771 |
"source": [
772 |
"import json\n",
773 |
"def build_readme(readme_path, outpath):\n",
774 |
" # get prompt sample\n",
775 |
" test_samples = []\n",
776 |
" with open(\"xlam-dataset-60k-qwen2-test.jsonl\") as fp:\n",
777 |
" test_samples = [json.loads(line) for line in fp]\n",
778 |
" messages = test_samples[5]['messages']\n",
779 |
780 |
" with open(readme_path) as fp:\n",
781 |
" txt = fp.read()\n",
782 |
" txt = txt.replace('USERMSG_PLACE_HOLDER', messages[0]['content'].replace('```', '[TRIPLE_BACKTICK]'))\n",
783 |
" # txt = txt.replace('MESSAGE_PLACE_HOLDER', str(messages[:-1]))\n",
784 |
" txt = txt.replace('RESPONSE_PLACE_HOLDER', str(messages[-1]['content']))\n",
785 |
" with open(outpath, 'w') as fp:\n",
786 |
" fp.write(txt)\n",
787 |
788 |
789 |
" # tokenizer.apply_chat_template(messages, tokenize=False) | out\n",
790 |
" # evals = []\n",
791 |
" # for i, sample in enumerate(test_samples):\n",
792 |
" # print(f'-- sample {i} --'| magenta)\n",
793 |
" # evals.append( get_answer(sample['messages'], model, tokenizer) )\n",
794 |
795 |
"build_readme('outputs_unslot/README_TEMPLATE.md', model_local + '/README.md')\n",
796 |
797 |
"if 1: # update file\n",
798 |
" local_files = [\n",
799 |
" # \"output_qwen/README.md\",\n",
800 |
" __file__\n",
801 |
" ]\n",
802 |
" api = HfApi()\n",
803 |
" for file_path in local_files:\n",
804 |
" # target_path = file_path.replace('output_qwen/2024-08-08/', '')\n",
805 |
" target_path = Path(file_path).name\n",
806 |
" api.upload_file(\n",
807 |
" path_or_fileobj= file_path,\n",
808 |
" path_in_repo= target_path,\n",
809 |
" repo_id=repo_name, revision=tag,\n",
810 |
" repo_type=\"model\",\n",
811 |
" # commit_message=\"Add README.md file\"\n",
812 |
" )"
813 |
814 |
815 |
816 |
"metadata": {
817 |
"kernelspec": {
818 |
"display_name": "fcv3-2",
819 |
"language": "python",
820 |
"name": "python3"
821 |
822 |
"language_info": {
823 |
"codemirror_mode": {
824 |
"name": "ipython",
825 |
"version": 3
826 |
827 |
"file_extension": ".py",
828 |
"mimetype": "text/x-python",
829 |
"name": "python",
830 |
"nbconvert_exporter": "python",
831 |
"pygments_lexer": "ipython3",
832 |
"version": "3.10.14"
833 |
834 |
835 |
"nbformat": 4,
836 |
"nbformat_minor": 2
837 |