alishafique commited on
Commit
f3fdfde
1 Parent(s): 04ef9ac

Upload run.ipynb

Browse files
Files changed (1) hide show
  1. run.ipynb +190 -1
run.ipynb CHANGED
@@ -1 +1,190 @@
1
- {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_streaming"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_streaming/testcase_messages.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "import time\n", "\n", "with gr.Blocks() as demo:\n", " chatbot = gr.Chatbot()\n", " msg = gr.Textbox()\n", " clear = gr.Button(\"Clear\")\n", "\n", " def user(user_message, history):\n", " return \"\", history + [[user_message, None]]\n", "\n", " def bot(history):\n", " bot_message = random.choice([\"How are you?\", \"I love you\", \"I'm very hungry\"])\n", " history[-1][1] = \"\"\n", " for character in bot_message:\n", " history[-1][1] += character\n", " time.sleep(0.05)\n", " yield history\n", "\n", " msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n", " bot, chatbot, chatbot\n", " )\n", " clear.click(lambda: None, None, chatbot, queue=False)\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "source": [
6
+ "# %%capture\n",
7
+ "# # Installs Unsloth, Xformers (Flash Attention) and all other packages!\n",
8
+ "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\" --quiet\n",
9
+ "\n",
10
+ "# We have to check which Torch version for Xformers (2.3 -> 0.0.27)\n",
11
+ "from torch import __version__; from packaging.version import Version as V\n",
12
+ "xformers = \"xformers==0.0.27\" if V(__version__) < V(\"2.4.0\") else \"xformers\"\n",
13
+ "!pip install --no-deps {xformers} \"trl<0.9.0\" peft accelerate bitsandbytes triton --quiet\n",
14
+ "\n",
15
+ "!pip install peft --quiet\n",
16
+ "!pip install --upgrade --no-cache-dir \"transformers<4.45.0\" --quiet # Reason: https://github.com/unslothai/unsloth/issues/1061\n",
17
+ "\n",
18
+ "!pip install -q gradio"
19
+ ],
20
+ "metadata": {
21
+ "id": "g0gl_TBTXRYC",
22
+ "outputId": "67222684-6f4f-4027-d8a5-32788590081c",
23
+ "colab": {
24
+ "base_uri": "https://localhost:8080/"
25
+ }
26
+ },
27
+ "execution_count": 1,
28
+ "outputs": [
29
+ {
30
+ "output_type": "stream",
31
+ "name": "stdout",
32
+ "text": [
33
+ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
34
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
35
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
36
+ ]
37
+ }
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "source": [
43
+ "import gradio as gr\n",
44
+ "import random\n",
45
+ "import time\n",
46
+ "import os\n",
47
+ "from unsloth import FastLanguageModel\n",
48
+ "import torch\n",
49
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
50
+ "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
51
+ "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
52
+ "\n",
53
+ "huggingface_token = \"\"\n",
54
+ "\n",
55
+ "if True:\n",
56
+ " from unsloth import FastLanguageModel\n",
57
+ " model, tokenizer = FastLanguageModel.from_pretrained(\n",
58
+ " model_name = \"traversaal-llm-regional-languages/Urdu_Llama3_2_4bit_PF25_adapter\", # YOUR MODEL YOU USED FOR TRAINING\n",
59
+ " max_seq_length = max_seq_length,\n",
60
+ " dtype = dtype,\n",
61
+ " load_in_4bit = load_in_4bit,\n",
62
+ " token = huggingface_token,\n",
63
+ " )\n",
64
+ " FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
65
+ "\n",
66
+ "\n",
67
+ "alpaca_prompt = \"\"\"{0}\\nInput: {1}\\nOutput: \"\"\"\n",
68
+ "\n",
69
+ "def generate_text(prompt):\n",
70
+ " # Format the prompt with instruction and input, and leave output prompt blank\n",
71
+ " formatted_prompt = alpaca_prompt.format(\n",
72
+ " \"دیئے گئے موضوع کے بارے میں ایک مختصر پیراگراف لکھیں۔\", # instruction\n",
73
+ " prompt # user input\n",
74
+ " )\n",
75
+ "\n",
76
+ " # Tokenize the prompt and move tensors to GPU\n",
77
+ " inputs = tokenizer([formatted_prompt], return_tensors=\"pt\").to(\"cuda\")\n",
78
+ "\n",
79
+ " # Generate output from the model\n",
80
+ " outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)\n",
81
+ "\n",
82
+ " # Decode the output and remove the instruction + input part\n",
83
+ " generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
84
+ "\n",
85
+ " # Remove the prompt part by splitting on \"Output:\" and returning only generated part\n",
86
+ " result = generated_text.split(\"Output:\")[-1].strip()\n",
87
+ "\n",
88
+ " return result\n",
89
+ "\n",
90
+ "iface = gr.Interface(\n",
91
+ " fn=generate_text,\n",
92
+ " inputs=gr.Textbox(lines=2, placeholder=\"Enter your prompt here...\"),\n",
93
+ " examples=['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟',\n",
94
+ " 'amazing food locations in Singapore',\n",
95
+ " 'best activities in London'],\n",
96
+ " outputs=\"text\",\n",
97
+ " title=\"Urdu Chatbot - Powered by traversaal-urdu-llama-3.2-1b\",\n",
98
+ " description=\"Ask me anything in Urdu!\",\n",
99
+ ")\n",
100
+ "\n",
101
+ "iface.launch()\n"
102
+ ],
103
+ "metadata": {
104
+ "id": "SM6OLuM5gve7",
105
+ "outputId": "a3512ee6-8f5f-40c5-d792-1c7d34bbe2e2",
106
+ "colab": {
107
+ "base_uri": "https://localhost:8080/",
108
+ "height": 796
109
+ }
110
+ },
111
+ "execution_count": 2,
112
+ "outputs": [
113
+ {
114
+ "output_type": "stream",
115
+ "name": "stdout",
116
+ "text": [
117
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
118
+ "==((====))== Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.\n",
119
+ " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n",
120
+ "O^O/ \\_/ \\ Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n",
121
+ "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]\n",
122
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
123
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
124
+ ]
125
+ },
126
+ {
127
+ "output_type": "stream",
128
+ "name": "stderr",
129
+ "text": [
130
+ "Unsloth 2024.9.post4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.\n"
131
+ ]
132
+ },
133
+ {
134
+ "output_type": "stream",
135
+ "name": "stdout",
136
+ "text": [
137
+ "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
138
+ "\n",
139
+ "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
140
+ "* Running on public URL: https://8d8a38dbca08b1f69c.gradio.live\n",
141
+ "\n",
142
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
143
+ ]
144
+ },
145
+ {
146
+ "output_type": "display_data",
147
+ "data": {
148
+ "text/plain": [
149
+ "<IPython.core.display.HTML object>"
150
+ ],
151
+ "text/html": [
152
+ "<div><iframe src=\"https://8d8a38dbca08b1f69c.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
153
+ ]
154
+ },
155
+ "metadata": {}
156
+ },
157
+ {
158
+ "output_type": "execute_result",
159
+ "data": {
160
+ "text/plain": []
161
+ },
162
+ "metadata": {},
163
+ "execution_count": 2
164
+ }
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "source": [],
170
+ "metadata": {
171
+ "id": "t1Zk6rKQoeYc"
172
+ },
173
+ "execution_count": null,
174
+ "outputs": []
175
+ }
176
+ ],
177
+ "metadata": {
178
+ "colab": {
179
+ "provenance": [],
180
+ "gpuType": "T4"
181
+ },
182
+ "kernelspec": {
183
+ "display_name": "Python 3",
184
+ "name": "python3"
185
+ },
186
+ "accelerator": "GPU"
187
+ },
188
+ "nbformat": 4,
189
+ "nbformat_minor": 0
190
+ }