name: Phi 3 model: phi3:mini version: 1 # Results Preferences top_p: 0.95 temperature: 0.7 frequency_penalty: 0 presence_penalty: 0 max_tokens: 128000 # Infer from base config.json -> max_position_embeddings stream: true # true | false stop: ["<|end|>"] # Engine / Model Settings engine: cortex.llamacpp prompt_template: "<|user|>\n{prompt}<|end|>\n<|assistant|>\n"