{
  "parameters": {
    "max_total_tokens": 4096,          // Increase from 2048
    "max_input_length": 2048,          // Increase from 1024
    "max_batch_total_tokens": 16384,   // Increase from 8192
    "max_concurrent_requests": 2,       // Increase from 1
    "max_batch_size": 2,               // Increase from 1
    "waiting_served_ratio": 0.8        // Decrease from 1.2
  },
  "hardware": {
    "task_type": "text-generation",
    "accelerator": "gpu",
    "num_gpus": 1,
    "gpu_memory_gb": 24,
    "distributed_setup": false
  },
  "framework_type": "pytorch",
  "torch_compile": true,
  "trust_remote_code": true,
  "disable_custom_kernels": false
}