|
apiVersion: v1 |
|
kind: Namespace |
|
metadata: |
|
name: local-ai |
|
--- |
|
apiVersion: v1 |
|
kind: PersistentVolumeClaim |
|
metadata: |
|
name: models-pvc |
|
namespace: local-ai |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
resources: |
|
requests: |
|
storage: 50Gi |
|
--- |
|
apiVersion: apps/v1 |
|
kind: Deployment |
|
metadata: |
|
name: local-ai |
|
namespace: local-ai |
|
labels: |
|
app: local-ai |
|
spec: |
|
selector: |
|
matchLabels: |
|
app: local-ai |
|
replicas: 1 |
|
template: |
|
metadata: |
|
labels: |
|
app: local-ai |
|
name: local-ai |
|
spec: |
|
runtimeClassName: "nvidia" |
|
containers: |
|
- args: |
|
- phi-2 |
|
env: |
|
- name: DEBUG |
|
value: "true" |
|
name: local-ai |
|
image: quay.io/go-skynet/local-ai:master-cublas-cuda12 |
|
imagePullPolicy: IfNotPresent |
|
resources: |
|
limits: |
|
nvidia.com/gpu: 1 |
|
volumeMounts: |
|
- name: models-volume |
|
mountPath: /build/models |
|
volumes: |
|
- name: models-volume |
|
persistentVolumeClaim: |
|
claimName: models-pvc |
|
--- |
|
apiVersion: v1 |
|
kind: Service |
|
metadata: |
|
name: local-ai |
|
namespace: local-ai |
|
spec: |
|
selector: |
|
app: local-ai |
|
type: NodePort |
|
ports: |
|
- protocol: TCP |
|
targetPort: 8080 |
|
port: 8080 |