{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example of using Triton Server Wrapper with RAPIDS/CuPy library in Jupyter Notebook" ] }, { "cell_type": "markdown", "source": [ "### Pure Python/CuPy and Triton Wrapper equivalent of The RAPIDS-Triton Linear Example:\n", " https://github.com/rapidsai/rapids-triton-linear-example#the-rapids-triton-linear-example\n", " (Remark: Above example is focused on latency minimization - our equivalent is focused on easy of use)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "markdown", "source": [ "## Triton server setup with custom linear model" ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "source": [ "Install dependencies" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import sys\n", "!{sys.executable} -m pip install numpy" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "metadata": {}, "source": [ "Required imports:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np\n", "import cupy as cp\n", "\n", "from pytriton.model_config import ModelConfig, Tensor\n", "from pytriton.triton import Triton\n", "from pytriton.decorators import batch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define linear model (for simplicity, sample model parameters are defined in class initializer):" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "VECTOR_SIZE = 10\n", "\n", "class LinearModel:\n", " def __init__(self):\n", " self.alpha = 2\n", " self.beta = cp.arange(VECTOR_SIZE)\n", "\n", " @batch\n", " def linear(self, **inputs):\n", " u_batch, v_batch = inputs.values()\n", " u_batch_cp, v_batch_cp = cp.asarray(u_batch), cp.asarray(v_batch)\n", " lin = u_batch_cp * self.alpha + v_batch_cp + self.beta\n", " return {\"lin\": cp.asnumpy(lin)}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Instantiate titon wrapper class and load model with defined callable:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "triton = Triton()\n", "lin_model = LinearModel()\n", "triton.bind(\n", " model_name=\"Linear\",\n", " infer_func=lin_model.linear,\n", " inputs=[\n", " Tensor(dtype=np.float64, shape=(VECTOR_SIZE,)),\n", " Tensor(dtype=np.float64, shape=(VECTOR_SIZE,)),\n", " ],\n", " outputs=[\n", " Tensor(name=\"lin\", dtype=np.float64, shape=(-1,)),\n", " ],\n", " config=ModelConfig(max_batch_size=128),\n", " strict=True,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run triton server with defined model inference callable" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "triton.run()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example inference performed with ModelClient calling triton server" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pytriton.client import ModelClient\n", "\n", "VECTOR_SIZE = 10\n", "BATCH_SIZE = 2\n", "\n", "u_batch = np.ones((BATCH_SIZE, VECTOR_SIZE), dtype=np.float64)\n", "v_batch = np.ones((BATCH_SIZE, VECTOR_SIZE), dtype=np.float64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with ModelClient(\"localhost\", \"Linear\") as client:\n", " result_batch = client.infer_batch(u_batch, v_batch)\n", "\n", "for output_name, data_batch in result_batch.items():\n", " print(f\"{output_name}: {data_batch.tolist()}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Stop triton server at the end" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "triton.stop()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 1 }