name: Llama 3
model: llama3:8B
version: 1

# Results Preferences
stop:
  - <|end_of_text|>
  - <|eot_id|>
top_p: 0.95
temperature: 0.7
frequency_penalty: 0
presence_penalty: 0
max_tokens: 8192 # Infer from base config.json -> max_position_embeddings
stream: true # true | false

# Engine / Model Settings
ngl: 33 # Infer from base config.json -> num_attention_heads
ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
engine: cortex.llamacpp
prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
# Prompt template: Can only be retrieved from instruct model
# - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053
# - Requires jinja format parser