replicaCount: 1 | |
deployment: | |
# https://quay.io/repository/go-skynet/local-ai?tab=tags | |
image: quay.io/go-skynet/local-ai:v1.40.0 | |
env: | |
threads: 4 | |
debug: "true" | |
context_size: 512 | |
galleries: '[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]' | |
preload_models: '[{ "id": "huggingface@thebloke__open-llama-13b-open-instruct-ggml__open-llama-13b-open-instruct.ggmlv3.q3_k_m.bin", "name": "gpt-3.5-turbo", "overrides": { "f16": true, "mmap": true }}]' | |
modelsPath: "/models" | |
resources: | |
{} | |
# We usually recommend not to specify default resources and to leave this as a conscious | |
# choice for the user. This also increases chances charts run on environments with little | |
# resources, such as Minikube. If you do want to specify resources, uncomment the following | |
# lines, adjust them as necessary, and remove the curly braces after 'resources:'. | |
# limits: | |
# cpu: 100m | |
# memory: 128Mi | |
# requests: | |
# cpu: 100m | |
# memory: 128Mi | |
# Prompt templates to include | |
# Note: the keys of this map will be the names of the prompt template files | |
promptTemplates: | |
{} | |
# ggml-gpt4all-j.tmpl: | | |
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. | |
# ### Prompt: | |
# {{.Input}} | |
# ### Response: | |
# Models to download at runtime | |
models: | |
# Whether to force download models even if they already exist | |
forceDownload: false | |
# The list of URLs to download models from | |
# Note: the name of the file will be the name of the loaded model | |
list: | |
#- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" | |
# basicAuth: base64EncodedCredentials | |
# Persistent storage for models and prompt templates. | |
# PVC and HostPath are mutually exclusive. If both are enabled, | |
# PVC configuration takes precedence. If neither are enabled, ephemeral | |
# storage is used. | |
persistence: | |
pvc: | |
enabled: false | |
size: 6Gi | |
accessModes: | |
- ReadWriteOnce | |
annotations: {} | |
# Optional | |
storageClass: ~ | |
hostPath: | |
enabled: false | |
path: "/models" | |
service: | |
type: ClusterIP | |
port: 8080 | |
annotations: {} | |
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout | |
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" | |
ingress: | |
enabled: false | |
className: "" | |
annotations: | |
{} | |
# kubernetes.io/ingress.class: nginx | |
# kubernetes.io/tls-acme: "true" | |
hosts: | |
- host: chart-example.local | |
paths: | |
- path: / | |
pathType: ImplementationSpecific | |
tls: [] | |
# - secretName: chart-example-tls | |
# hosts: | |
# - chart-example.local | |
nodeSelector: {} | |
tolerations: [] | |
affinity: {} | |