llama3.1 / model.yml
van-qa's picture
Duplicate from cortexso/llama3
deb2cba verified
raw
history blame
875 Bytes
name: Llama 3
model: llama3:8B
version: 1
# Results Preferences
stop:
- <|end_of_text|>
- <|eot_id|>
top_p: 0.95
temperature: 0.7
frequency_penalty: 0
presence_penalty: 0
max_tokens: 8192 # Infer from base config.json -> max_position_embeddings
stream: true # true | false
# Engine / Model Settings
ngl: 33 # Infer from base config.json -> num_attention_heads
ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
engine: cortex.llamacpp
prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
# Prompt template: Can only be retrieved from instruct model
# - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053
# - Requires jinja format parser