File size: 1,289 Bytes
7def60a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
apiVersion: v1
kind: Namespace
metadata:
  name: local-ai
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: models-pvc
  namespace: local-ai
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 50Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: local-ai
  namespace: local-ai
  labels:
    app: local-ai
spec:
  selector:
    matchLabels:
      app: local-ai
  replicas: 1
  template:
    metadata:
      labels:
        app: local-ai
      name: local-ai
    spec:
      runtimeClassName: "nvidia"
      containers:
        - args:
          - phi-2
          env:
          - name: DEBUG
            value: "true"
          name: local-ai
          image: quay.io/go-skynet/local-ai:master-cublas-cuda12
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              nvidia.com/gpu: 1
          volumeMounts:
            - name: models-volume
              mountPath: /build/models
      volumes:
        - name: models-volume
          persistentVolumeClaim:
            claimName: models-pvc
---
apiVersion: v1
kind: Service
metadata:
  name: local-ai
  namespace: local-ai
spec:
  selector:
    app: local-ai
  type: NodePort
  ports:
    - protocol: TCP
      targetPort: 8080
      port: 8080