Skip to content

Commit 34b9db5

Browse files
committed
Request and model concurrency
This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS.
1 parent ee448de commit 34b9db5

30 files changed

+2548
-1363
lines changed

api/client.go

+7
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ func ClientFromEnvironment() (*Client, error) {
9191
}, nil
9292
}
9393

94+
func NewClient(base *url.URL, http *http.Client) *Client {
95+
return &Client{
96+
base: base,
97+
http: http,
98+
}
99+
}
100+
94101
func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
95102
var reqBody io.Reader
96103
var data []byte

format/bytes.go

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ const (
1515

1616
KibiByte = Byte * 1024
1717
MebiByte = KibiByte * 1024
18+
GibiByte = MebiByte * 1024
1819
)
1920

2021
func HumanBytes(b int64) string {

gpu/amd_common.go

+56-14
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
"log/slog"
88
"os"
99
"path/filepath"
10-
"strconv"
10+
"runtime"
1111
"strings"
1212
)
1313

@@ -35,22 +35,64 @@ func GetSupportedGFX(libDir string) ([]string, error) {
3535
return ret, nil
3636
}
3737

38-
func amdSetVisibleDevices(ids []int, skip map[int]interface{}) {
39-
// Set the visible devices if not already set
40-
// TODO - does sort order matter?
41-
devices := []string{}
42-
for i := range ids {
43-
if _, skipped := skip[i]; skipped {
38+
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
39+
ids := []string{}
40+
for _, info := range gpuInfo {
41+
if info.Library != "rocm" {
42+
// TODO shouldn't happen if things are wired correctly...
43+
slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
4444
continue
4545
}
46-
devices = append(devices, strconv.Itoa(i))
46+
ids = append(ids, info.ID)
4747
}
48+
return "HIP_VISIBLE_DEVICES", strings.Join(ids, ",")
49+
}
4850

49-
val := strings.Join(devices, ",")
50-
err := os.Setenv("HIP_VISIBLE_DEVICES", val)
51-
if err != nil {
52-
slog.Warn(fmt.Sprintf("failed to set env: %s", err))
53-
} else {
54-
slog.Info("Setting HIP_VISIBLE_DEVICES=" + val)
51+
func commonAMDValidateLibDir() (string, error) {
52+
// We try to favor system paths first, so that we can wire up the subprocess to use
53+
// the system version. Only use our bundled version if the system version doesn't work
54+
// This gives users a more recovery options if versions have subtle problems at runtime
55+
56+
// Prefer explicit HIP env var
57+
hipPath := os.Getenv("HIP_PATH")
58+
if hipPath != "" {
59+
hipLibDir := filepath.Join(hipPath, "bin")
60+
if rocmLibUsable(hipLibDir) {
61+
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
62+
return hipLibDir, nil
63+
}
64+
}
65+
66+
// Scan the LD_LIBRARY_PATH or PATH
67+
pathEnv := "LD_LIBRARY_PATH"
68+
if runtime.GOOS == "windows" {
69+
pathEnv = "PATH"
70+
}
71+
72+
paths := os.Getenv(pathEnv)
73+
for _, path := range filepath.SplitList(paths) {
74+
d, err := filepath.Abs(path)
75+
if err != nil {
76+
continue
77+
}
78+
if rocmLibUsable(d) {
79+
return d, nil
80+
}
81+
}
82+
83+
// Well known location(s)
84+
if rocmLibUsable(RocmStandardLocation) {
85+
return RocmStandardLocation, nil
86+
}
87+
88+
// Installer payload location if we're running the installed binary
89+
exe, err := os.Executable()
90+
if err == nil {
91+
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
92+
if rocmLibUsable(rocmTargetDir) {
93+
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
94+
return rocmTargetDir, nil
95+
}
5596
}
97+
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
5698
}

gpu/amd_hip_windows.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ func NewHipLib() (*HipLib, error) {
6969
func (hl *HipLib) Release() {
7070
err := windows.FreeLibrary(hl.dll)
7171
if err != nil {
72-
slog.Warn(fmt.Sprintf("failed to unload amdhip64.dll: %s", err))
72+
slog.Warn("failed to unload amdhip64.dll", "error", err)
7373
}
7474
hl.dll = 0
7575
}
@@ -98,7 +98,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
9898
return 0
9999
}
100100
if status != hipSuccess {
101-
slog.Warn(fmt.Sprintf("failed call to hipGetDeviceCount: %d %s", status, err))
101+
slog.Warn("failed call to hipGetDeviceCount", "status", status, "error", err)
102102
}
103103
return count
104104
}

0 commit comments

Comments
 (0)