GGUF
Inference Endpoints
conversational
jan-hq commited on
Commit
6df67d9
1 Parent(s): 764847e

Create model.yml

Browse files
Files changed (1) hide show
  1. model.yml +16 -0
model.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Phi 3
2
+ model: phi3:mini
3
+ version: 1
4
+
5
+ # Results Preferences
6
+ top_p: 0.95
7
+ temperature: 0.7
8
+ frequency_penalty: 0
9
+ presence_penalty: 0
10
+ max_tokens: 128000 # Infer from base config.json -> max_position_embeddings
11
+ stream: true # true | false
12
+ stop: ["<|end|>"]
13
+
14
+ # Engine / Model Settings
15
+ engine: cortex.llamacpp
16
+ prompt_template: "<|user|>\n{prompt}<|end|>\n<|assistant|>\n"