Spaces:
Runtime error
Runtime error
alishafique
commited on
Commit
•
f3fdfde
1
Parent(s):
04ef9ac
Upload run.ipynb
Browse files
run.ipynb
CHANGED
@@ -1 +1,190 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"source": [
|
6 |
+
"# %%capture\n",
|
7 |
+
"# # Installs Unsloth, Xformers (Flash Attention) and all other packages!\n",
|
8 |
+
"!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\" --quiet\n",
|
9 |
+
"\n",
|
10 |
+
"# We have to check which Torch version for Xformers (2.3 -> 0.0.27)\n",
|
11 |
+
"from torch import __version__; from packaging.version import Version as V\n",
|
12 |
+
"xformers = \"xformers==0.0.27\" if V(__version__) < V(\"2.4.0\") else \"xformers\"\n",
|
13 |
+
"!pip install --no-deps {xformers} \"trl<0.9.0\" peft accelerate bitsandbytes triton --quiet\n",
|
14 |
+
"\n",
|
15 |
+
"!pip install peft --quiet\n",
|
16 |
+
"!pip install --upgrade --no-cache-dir \"transformers<4.45.0\" --quiet # Reason: https://github.com/unslothai/unsloth/issues/1061\n",
|
17 |
+
"\n",
|
18 |
+
"!pip install -q gradio"
|
19 |
+
],
|
20 |
+
"metadata": {
|
21 |
+
"id": "g0gl_TBTXRYC",
|
22 |
+
"outputId": "67222684-6f4f-4027-d8a5-32788590081c",
|
23 |
+
"colab": {
|
24 |
+
"base_uri": "https://localhost:8080/"
|
25 |
+
}
|
26 |
+
},
|
27 |
+
"execution_count": 1,
|
28 |
+
"outputs": [
|
29 |
+
{
|
30 |
+
"output_type": "stream",
|
31 |
+
"name": "stdout",
|
32 |
+
"text": [
|
33 |
+
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
34 |
+
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
35 |
+
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
|
36 |
+
]
|
37 |
+
}
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "code",
|
42 |
+
"source": [
|
43 |
+
"import gradio as gr\n",
|
44 |
+
"import random\n",
|
45 |
+
"import time\n",
|
46 |
+
"import os\n",
|
47 |
+
"from unsloth import FastLanguageModel\n",
|
48 |
+
"import torch\n",
|
49 |
+
"max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
|
50 |
+
"dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
|
51 |
+
"load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
|
52 |
+
"\n",
|
53 |
+
"huggingface_token = \"\"\n",
|
54 |
+
"\n",
|
55 |
+
"if True:\n",
|
56 |
+
" from unsloth import FastLanguageModel\n",
|
57 |
+
" model, tokenizer = FastLanguageModel.from_pretrained(\n",
|
58 |
+
" model_name = \"traversaal-llm-regional-languages/Urdu_Llama3_2_4bit_PF25_adapter\", # YOUR MODEL YOU USED FOR TRAINING\n",
|
59 |
+
" max_seq_length = max_seq_length,\n",
|
60 |
+
" dtype = dtype,\n",
|
61 |
+
" load_in_4bit = load_in_4bit,\n",
|
62 |
+
" token = huggingface_token,\n",
|
63 |
+
" )\n",
|
64 |
+
" FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
|
65 |
+
"\n",
|
66 |
+
"\n",
|
67 |
+
"alpaca_prompt = \"\"\"{0}\\nInput: {1}\\nOutput: \"\"\"\n",
|
68 |
+
"\n",
|
69 |
+
"def generate_text(prompt):\n",
|
70 |
+
" # Format the prompt with instruction and input, and leave output prompt blank\n",
|
71 |
+
" formatted_prompt = alpaca_prompt.format(\n",
|
72 |
+
" \"دیئے گئے موضوع کے بارے میں ایک مختصر پیراگراف لکھیں۔\", # instruction\n",
|
73 |
+
" prompt # user input\n",
|
74 |
+
" )\n",
|
75 |
+
"\n",
|
76 |
+
" # Tokenize the prompt and move tensors to GPU\n",
|
77 |
+
" inputs = tokenizer([formatted_prompt], return_tensors=\"pt\").to(\"cuda\")\n",
|
78 |
+
"\n",
|
79 |
+
" # Generate output from the model\n",
|
80 |
+
" outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)\n",
|
81 |
+
"\n",
|
82 |
+
" # Decode the output and remove the instruction + input part\n",
|
83 |
+
" generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
84 |
+
"\n",
|
85 |
+
" # Remove the prompt part by splitting on \"Output:\" and returning only generated part\n",
|
86 |
+
" result = generated_text.split(\"Output:\")[-1].strip()\n",
|
87 |
+
"\n",
|
88 |
+
" return result\n",
|
89 |
+
"\n",
|
90 |
+
"iface = gr.Interface(\n",
|
91 |
+
" fn=generate_text,\n",
|
92 |
+
" inputs=gr.Textbox(lines=2, placeholder=\"Enter your prompt here...\"),\n",
|
93 |
+
" examples=['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟',\n",
|
94 |
+
" 'amazing food locations in Singapore',\n",
|
95 |
+
" 'best activities in London'],\n",
|
96 |
+
" outputs=\"text\",\n",
|
97 |
+
" title=\"Urdu Chatbot - Powered by traversaal-urdu-llama-3.2-1b\",\n",
|
98 |
+
" description=\"Ask me anything in Urdu!\",\n",
|
99 |
+
")\n",
|
100 |
+
"\n",
|
101 |
+
"iface.launch()\n"
|
102 |
+
],
|
103 |
+
"metadata": {
|
104 |
+
"id": "SM6OLuM5gve7",
|
105 |
+
"outputId": "a3512ee6-8f5f-40c5-d792-1c7d34bbe2e2",
|
106 |
+
"colab": {
|
107 |
+
"base_uri": "https://localhost:8080/",
|
108 |
+
"height": 796
|
109 |
+
}
|
110 |
+
},
|
111 |
+
"execution_count": 2,
|
112 |
+
"outputs": [
|
113 |
+
{
|
114 |
+
"output_type": "stream",
|
115 |
+
"name": "stdout",
|
116 |
+
"text": [
|
117 |
+
"🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
|
118 |
+
"==((====))== Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.\n",
|
119 |
+
" \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n",
|
120 |
+
"O^O/ \\_/ \\ Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n",
|
121 |
+
"\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]\n",
|
122 |
+
" \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
|
123 |
+
"Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"output_type": "stream",
|
128 |
+
"name": "stderr",
|
129 |
+
"text": [
|
130 |
+
"Unsloth 2024.9.post4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.\n"
|
131 |
+
]
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"output_type": "stream",
|
135 |
+
"name": "stdout",
|
136 |
+
"text": [
|
137 |
+
"Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
|
138 |
+
"\n",
|
139 |
+
"Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
|
140 |
+
"* Running on public URL: https://8d8a38dbca08b1f69c.gradio.live\n",
|
141 |
+
"\n",
|
142 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
|
143 |
+
]
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"output_type": "display_data",
|
147 |
+
"data": {
|
148 |
+
"text/plain": [
|
149 |
+
"<IPython.core.display.HTML object>"
|
150 |
+
],
|
151 |
+
"text/html": [
|
152 |
+
"<div><iframe src=\"https://8d8a38dbca08b1f69c.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
"metadata": {}
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"output_type": "execute_result",
|
159 |
+
"data": {
|
160 |
+
"text/plain": []
|
161 |
+
},
|
162 |
+
"metadata": {},
|
163 |
+
"execution_count": 2
|
164 |
+
}
|
165 |
+
]
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cell_type": "code",
|
169 |
+
"source": [],
|
170 |
+
"metadata": {
|
171 |
+
"id": "t1Zk6rKQoeYc"
|
172 |
+
},
|
173 |
+
"execution_count": null,
|
174 |
+
"outputs": []
|
175 |
+
}
|
176 |
+
],
|
177 |
+
"metadata": {
|
178 |
+
"colab": {
|
179 |
+
"provenance": [],
|
180 |
+
"gpuType": "T4"
|
181 |
+
},
|
182 |
+
"kernelspec": {
|
183 |
+
"display_name": "Python 3",
|
184 |
+
"name": "python3"
|
185 |
+
},
|
186 |
+
"accelerator": "GPU"
|
187 |
+
},
|
188 |
+
"nbformat": 4,
|
189 |
+
"nbformat_minor": 0
|
190 |
+
}
|