urdu-llama-3.2-1b

Runtime error

App Files Files Community

alishafique commited on Oct 10

Commit

f3fdfde

•

1 Parent(s): 04ef9ac

Upload run.ipynb

Browse files

Files changed (1) hide show

run.ipynb +190 -1

run.ipynb CHANGED Viewed

	@@ -1 +1,190 @@
1	- {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: chatbot_streaming"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/chatbot_streaming/testcase_messages.py"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import random\n", "import time\n", "\n", "with gr.Blocks() as demo:\n", " chatbot = gr.Chatbot()\n", " msg = gr.Textbox()\n", " clear = gr.Button(\"Clear\")\n", "\n", " def user(user_message, history):\n", " return \"\", history + [[user_message, None]]\n", "\n", " def bot(history):\n", " bot_message = random.choice([\"How are you?\", \"I love you\", \"I'm very hungry\"])\n", " history[-1][1] = \"\"\n", " for character in bot_message:\n", " history[-1][1] += character\n", " time.sleep(0.05)\n", " yield history\n", "\n", " msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n", " bot, chatbot, chatbot\n", " )\n", " clear.click(lambda: None, None, chatbot, queue=False)\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "# %%capture\n",
+        "# # Installs Unsloth, Xformers (Flash Attention) and all other packages!\n",
+        "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\" --quiet\n",
+        "\n",
+        "# We have to check which Torch version for Xformers (2.3 -> 0.0.27)\n",
+        "from torch import __version__; from packaging.version import Version as V\n",
+        "xformers = \"xformers==0.0.27\" if V(__version__) < V(\"2.4.0\") else \"xformers\"\n",
+        "!pip install --no-deps {xformers} \"trl<0.9.0\" peft accelerate bitsandbytes triton --quiet\n",
+        "\n",
+        "!pip install peft --quiet\n",
+        "!pip install --upgrade --no-cache-dir \"transformers<4.45.0\"  --quiet # Reason: https://github.com/unslothai/unsloth/issues/1061\n",
+        "\n",
+        "!pip install -q gradio"
+      ],
+      "metadata": {
+        "id": "g0gl_TBTXRYC",
+        "outputId": "67222684-6f4f-4027-d8a5-32788590081c",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gradio as gr\n",
+        "import random\n",
+        "import time\n",
+        "import os\n",
+        "from unsloth import FastLanguageModel\n",
+        "import torch\n",
+        "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+        "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+        "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+        "\n",
+        "huggingface_token = \"\"\n",
+        "\n",
+        "if True:\n",
+        "    from unsloth import FastLanguageModel\n",
+        "    model, tokenizer = FastLanguageModel.from_pretrained(\n",
+        "        model_name = \"traversaal-llm-regional-languages/Urdu_Llama3_2_4bit_PF25_adapter\", # YOUR MODEL YOU USED FOR TRAINING\n",
+        "        max_seq_length = max_seq_length,\n",
+        "        dtype = dtype,\n",
+        "        load_in_4bit = load_in_4bit,\n",
+        "        token = huggingface_token,\n",
+        "    )\n",
+        "    FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+        "\n",
+        "\n",
+        "alpaca_prompt = \"\"\"{0}\\nInput: {1}\\nOutput: \"\"\"\n",
+        "\n",
+        "def generate_text(prompt):\n",
+        "    # Format the prompt with instruction and input, and leave output prompt blank\n",
+        "    formatted_prompt = alpaca_prompt.format(\n",
+        "        \"دیئے گئے موضوع کے بارے میں ایک مختصر پیراگراف لکھیں۔\",  # instruction\n",
+        "        prompt  # user input\n",
+        "    )\n",
+        "\n",
+        "    # Tokenize the prompt and move tensors to GPU\n",
+        "    inputs = tokenizer([formatted_prompt], return_tensors=\"pt\").to(\"cuda\")\n",
+        "\n",
+        "    # Generate output from the model\n",
+        "    outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)\n",
+        "\n",
+        "    # Decode the output and remove the instruction + input part\n",
+        "    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+        "\n",
+        "    # Remove the prompt part by splitting on \"Output:\" and returning only generated part\n",
+        "    result = generated_text.split(\"Output:\")[-1].strip()\n",
+        "\n",
+        "    return result\n",
+        "\n",
+        "iface = gr.Interface(\n",
+        "    fn=generate_text,\n",
+        "    inputs=gr.Textbox(lines=2, placeholder=\"Enter your prompt here...\"),\n",
+        "    examples=['میں کراچی جانا چاہتا ہوں، وہاں کے کچھ بہترین مقامات کون سے ہیں؟',\n",
+        "              'amazing food locations in Singapore',\n",
+        "              'best activities in London'],\n",
+        "    outputs=\"text\",\n",
+        "    title=\"Urdu Chatbot - Powered by traversaal-urdu-llama-3.2-1b\",\n",
+        "    description=\"Ask me anything in Urdu!\",\n",
+        ")\n",
+        "\n",
+        "iface.launch()\n"
+      ],
+      "metadata": {
+        "id": "SM6OLuM5gve7",
+        "outputId": "a3512ee6-8f5f-40c5-d792-1c7d34bbe2e2",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 796
+        }
+      },
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+            "==((====))==  Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.\n",
+            "   \\\\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n",
+            "O^O/ \\_/ \\    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n",
+            "\\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]\n",
+            " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n",
+            "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Unsloth 2024.9.post4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
+            "\n",
+            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
+            "* Running on public URL: https://8d8a38dbca08b1f69c.gradio.live\n",
+            "\n",
+            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<div><iframe src=\"https://8d8a38dbca08b1f69c.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": []
+          },
+          "metadata": {},
+          "execution_count": 2
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "t1Zk6rKQoeYc"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "accelerator": "GPU"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}