{ "parameters": { "max_total_tokens": 2048, "max_input_length": 1024, "max_batch_total_tokens": 8192, "max_concurrent_requests": 1, "max_best_of": 1, "max_stop_sequences": 4, "max_batch_size": 1, "waiting_served_ratio": 1.2 }, "hardware": { "task_type": "text-generation", "accelerator": "gpu", "num_gpus": 1, "gpu_memory_gb": 24, "distributed_setup": false }, "framework_type": "pytorch", "torch_compile": false, "trust_remote_code": true, "disable_custom_kernels": true }