|
package model |
|
|
|
import ( |
|
"context" |
|
"fmt" |
|
"os" |
|
"path/filepath" |
|
"strings" |
|
"sync" |
|
|
|
"github.com/mudler/LocalAI/pkg/templates" |
|
|
|
"github.com/mudler/LocalAI/pkg/functions" |
|
"github.com/mudler/LocalAI/pkg/grpc" |
|
"github.com/mudler/LocalAI/pkg/utils" |
|
|
|
process "github.com/mudler/go-processmanager" |
|
"github.com/rs/zerolog/log" |
|
) |
|
|
|
|
|
|
|
|
|
type PromptTemplateData struct { |
|
SystemPrompt string |
|
SuppressSystemPrompt bool |
|
Input string |
|
Instruction string |
|
Functions []functions.Function |
|
MessageIndex int |
|
} |
|
|
|
type ChatMessageTemplateData struct { |
|
SystemPrompt string |
|
Role string |
|
RoleName string |
|
FunctionName string |
|
Content string |
|
MessageIndex int |
|
Function bool |
|
FunctionCall interface{} |
|
LastMessage bool |
|
} |
|
|
|
|
|
|
|
|
|
type ModelLoader struct { |
|
ModelPath string |
|
mu sync.Mutex |
|
|
|
grpcClients map[string]grpc.Backend |
|
models map[string]ModelAddress |
|
grpcProcesses map[string]*process.Process |
|
templates *templates.TemplateCache |
|
wd *WatchDog |
|
} |
|
|
|
type ModelAddress string |
|
|
|
func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) grpc.Backend { |
|
enableWD := false |
|
if wd != nil { |
|
enableWD = true |
|
} |
|
return grpc.NewClient(string(m), parallel, wd, enableWD) |
|
} |
|
|
|
func NewModelLoader(modelPath string) *ModelLoader { |
|
nml := &ModelLoader{ |
|
ModelPath: modelPath, |
|
grpcClients: make(map[string]grpc.Backend), |
|
models: make(map[string]ModelAddress), |
|
templates: templates.NewTemplateCache(modelPath), |
|
grpcProcesses: make(map[string]*process.Process), |
|
} |
|
|
|
return nml |
|
} |
|
|
|
func (ml *ModelLoader) SetWatchDog(wd *WatchDog) { |
|
ml.wd = wd |
|
} |
|
|
|
func (ml *ModelLoader) ExistsInModelPath(s string) bool { |
|
return utils.ExistsInPath(ml.ModelPath, s) |
|
} |
|
|
|
var knownFilesToSkip []string = []string{ |
|
"MODEL_CARD", |
|
"README", |
|
"README.md", |
|
} |
|
|
|
var knownModelsNameSuffixToSkip []string = []string{ |
|
".tmpl", |
|
".keep", |
|
".yaml", |
|
".yml", |
|
".json", |
|
".txt", |
|
".md", |
|
".MD", |
|
".DS_Store", |
|
".", |
|
".partial", |
|
".tar.gz", |
|
} |
|
|
|
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) { |
|
files, err := os.ReadDir(ml.ModelPath) |
|
if err != nil { |
|
return []string{}, err |
|
} |
|
|
|
models := []string{} |
|
FILE: |
|
for _, file := range files { |
|
|
|
for _, skip := range knownFilesToSkip { |
|
if strings.EqualFold(file.Name(), skip) { |
|
continue FILE |
|
} |
|
} |
|
|
|
|
|
for _, skip := range knownModelsNameSuffixToSkip { |
|
if strings.HasSuffix(file.Name(), skip) { |
|
continue FILE |
|
} |
|
} |
|
|
|
|
|
if file.IsDir() { |
|
continue |
|
} |
|
|
|
models = append(models, file.Name()) |
|
} |
|
|
|
return models, nil |
|
} |
|
|
|
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (ModelAddress, error)) (ModelAddress, error) { |
|
ml.mu.Lock() |
|
defer ml.mu.Unlock() |
|
|
|
|
|
if model := ml.CheckIsLoaded(modelName); model != "" { |
|
return model, nil |
|
} |
|
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName) |
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile) |
|
|
|
model, err := loader(modelName, modelFile) |
|
if err != nil { |
|
return "", err |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ml.models[modelName] = model |
|
return model, nil |
|
} |
|
|
|
func (ml *ModelLoader) ShutdownModel(modelName string) error { |
|
ml.mu.Lock() |
|
defer ml.mu.Unlock() |
|
|
|
return ml.stopModel(modelName) |
|
} |
|
|
|
func (ml *ModelLoader) stopModel(modelName string) error { |
|
defer ml.deleteProcess(modelName) |
|
if _, ok := ml.models[modelName]; !ok { |
|
return fmt.Errorf("model %s not found", modelName) |
|
} |
|
return nil |
|
|
|
} |
|
|
|
func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress { |
|
var client grpc.Backend |
|
if m, ok := ml.models[s]; ok { |
|
log.Debug().Msgf("Model already loaded in memory: %s", s) |
|
if c, ok := ml.grpcClients[s]; ok { |
|
client = c |
|
} else { |
|
client = m.GRPC(false, ml.wd) |
|
} |
|
alive, err := client.HealthCheck(context.Background()) |
|
if !alive { |
|
log.Warn().Msgf("GRPC Model not responding: %s", err.Error()) |
|
log.Warn().Msgf("Deleting the process in order to recreate it") |
|
if !ml.grpcProcesses[s].IsAlive() { |
|
log.Debug().Msgf("GRPC Process is not responding: %s", s) |
|
|
|
err := ml.deleteProcess(s) |
|
if err != nil { |
|
log.Error().Err(err).Str("process", s).Msg("error stopping process") |
|
} |
|
return "" |
|
} |
|
} |
|
|
|
return m |
|
} |
|
|
|
return "" |
|
} |
|
|
|
const ( |
|
ChatPromptTemplate templates.TemplateType = iota |
|
ChatMessageTemplate |
|
CompletionPromptTemplate |
|
EditPromptTemplate |
|
FunctionsPromptTemplate |
|
) |
|
|
|
func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) { |
|
|
|
if templateType == ChatMessageTemplate { |
|
return "", fmt.Errorf("invalid templateType: ChatMessage") |
|
} |
|
return ml.templates.EvaluateTemplate(templateType, templateName, in) |
|
} |
|
|
|
func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) { |
|
return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData) |
|
} |
|
|