-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathservices.go
124 lines (103 loc) · 3.01 KB
/
services.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"sync"
)
type LlamaService struct {
Name string
Process *exec.Cmd
ConfigPath string
Port int
}
var (
servicesMutex sync.Mutex
services = make(map[string]*LlamaService)
)
// StartEmbeddingsService starts the local embeddings service if no remote host is configured
func StartEmbeddingsService(config *Config) error {
if config.Embeddings.Host != "" {
return nil // Remote host is configured, don't start local service
}
servicesMutex.Lock()
defer servicesMutex.Unlock()
// Default port for embeddings service
const embeddingsPort = 32184
modelPath := filepath.Join(config.DataPath, "models", "embeddings", "nomic-embed-text-v1.5.Q8_0.gguf")
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
return fmt.Errorf("embeddings model not found at %s", modelPath)
}
llamaBinary := filepath.Join(config.DataPath, "llama-cpp", "main")
cmd := exec.Command(llamaBinary,
"-m", modelPath,
"-c", "65536",
"-np", "8",
"-b", "8192",
"-ub", "8192",
"-fa",
"--host", "127.0.0.1",
"--port", fmt.Sprintf("%d", embeddingsPort),
"-lv", "1",
"--embedding")
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start embeddings service: %w", err)
}
services["embeddings"] = &LlamaService{
Name: "embeddings",
Process: cmd,
Port: embeddingsPort,
}
// Update config to use local service
config.Embeddings.Host = fmt.Sprintf("http://127.0.0.1:%d/v1/embeddings", embeddingsPort)
return nil
}
// StartRerankerService starts the local reranker service if no remote host is configured
func StartRerankerService(config *Config) error {
if config.Reranker.Host != "" {
return nil // Remote host is configured, don't start local service
}
servicesMutex.Lock()
defer servicesMutex.Unlock()
// Default port for reranker service
const rerankerPort = 32185
modelPath := filepath.Join(config.DataPath, "models", "rerankers", "slide-bge-reranker-v2-m3.Q4_K_M.gguf")
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
return fmt.Errorf("reranker model not found at %s", modelPath)
}
llamaBinary := filepath.Join(config.DataPath, "llama-cpp", "main")
cmd := exec.Command(llamaBinary,
"-m", modelPath,
"-c", "65536",
"-np", "8",
"-b", "8192",
"-ub", "8192",
"-fa",
"--host", "127.0.0.1",
"--port", fmt.Sprintf("%d", rerankerPort),
"-lv", "1",
"--reranking",
"--pooling", "rank")
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start reranker service: %w", err)
}
services["reranker"] = &LlamaService{
Name: "reranker",
Process: cmd,
Port: rerankerPort,
}
// Update config to use local service
config.Reranker.Host = fmt.Sprintf("http://127.0.0.1:%d/v1/rerank", rerankerPort)
return nil
}
// StopAllServices stops all running llama.cpp services
func StopAllServices() {
servicesMutex.Lock()
defer servicesMutex.Unlock()
for _, service := range services {
if service.Process != nil && service.Process.Process != nil {
service.Process.Process.Kill()
}
}
}