LivePortrait2

Sleeping

File size: 5,241 Bytes

e3af00f

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example of using Triton Server Wrapper with RAPIDS/CuPy library in Jupyter Notebook"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Pure Python/CuPy and Triton Wrapper equivalent of The RAPIDS-Triton Linear Example:\n",
    " <a href=\"https://github.com/rapidsai/rapids-triton-linear-example#the-rapids-triton-linear-example\">https://github.com/rapidsai/rapids-triton-linear-example#the-rapids-triton-linear-example</a>\n",
    " (Remark: Above example is focused on latency minimization - our equivalent is focused on easy of use)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Triton server setup with custom linear model"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Install dependencies"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import sys\n",
    "!{sys.executable} -m pip install numpy"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Required imports:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import cupy as cp\n",
    "\n",
    "from pytriton.model_config import ModelConfig, Tensor\n",
    "from pytriton.triton import Triton\n",
    "from pytriton.decorators import batch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define linear model (for simplicity, sample model parameters are defined in class initializer):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "VECTOR_SIZE = 10\n",
    "\n",
    "class LinearModel:\n",
    "    def __init__(self):\n",
    "        self.alpha = 2\n",
    "        self.beta = cp.arange(VECTOR_SIZE)\n",
    "\n",
    "    @batch\n",
    "    def linear(self, **inputs):\n",
    "        u_batch, v_batch = inputs.values()\n",
    "        u_batch_cp, v_batch_cp = cp.asarray(u_batch), cp.asarray(v_batch)\n",
    "        lin = u_batch_cp * self.alpha + v_batch_cp + self.beta\n",
    "        return {\"lin\": cp.asnumpy(lin)}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Instantiate titon wrapper class and load model with defined callable:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "triton = Triton()\n",
    "lin_model = LinearModel()\n",
    "triton.bind(\n",
    "    model_name=\"Linear\",\n",
    "    infer_func=lin_model.linear,\n",
    "    inputs=[\n",
    "        Tensor(dtype=np.float64, shape=(VECTOR_SIZE,)),\n",
    "        Tensor(dtype=np.float64, shape=(VECTOR_SIZE,)),\n",
    "    ],\n",
    "    outputs=[\n",
    "        Tensor(name=\"lin\", dtype=np.float64, shape=(-1,)),\n",
    "    ],\n",
    "    config=ModelConfig(max_batch_size=128),\n",
    "    strict=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run triton server with defined model inference callable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "triton.run()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example inference performed with ModelClient calling triton server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pytriton.client import ModelClient\n",
    "\n",
    "VECTOR_SIZE = 10\n",
    "BATCH_SIZE = 2\n",
    "\n",
    "u_batch = np.ones((BATCH_SIZE, VECTOR_SIZE), dtype=np.float64)\n",
    "v_batch = np.ones((BATCH_SIZE, VECTOR_SIZE), dtype=np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with ModelClient(\"localhost\", \"Linear\") as client:\n",
    "    result_batch = client.infer_batch(u_batch, v_batch)\n",
    "\n",
    "for output_name, data_batch in result_batch.items():\n",
    "    print(f\"{output_name}: {data_batch.tolist()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Stop triton server at the end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "triton.stop()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}