model-manager/config.example.yaml at main · Ckrest/model-manager · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Model Manager configuration
#
# Copy to config.local.yaml and edit to override defaults.
# Values shown here are the defaults from config.py.

# Ollama connection
OLLAMA_BASE_URL: "http://localhost:11434"

# VRAM management
VRAM_SAFETY_MARGIN_MB: 1024        # Reserve 1 GB of VRAM headroom
VRAM_ESTIMATION_MULTIPLIER: 1.3    # Multiply model size by this for initial VRAM estimate

# Model management
MODEL_KEEP_ALIVE: 300              # Seconds to keep models loaded after last use
MAX_CONCURRENT_PER_MODEL: 20       # Max concurrent requests per loaded model

# Scheduler
SCHEDULER_STRATEGY: "demand_based" # Options: greedy, priority_first, demand_based, balanced
SCHEDULER_LOOP_INTERVAL: 0.1       # Seconds between scheduler ticks

# Resource monitoring
MONITOR_POLL_INTERVAL: 2           # Seconds between GPU stat polls

# Queue
QUEUE_MAX_SIZE: 1000               # Maximum number of queued jobs

# HTTP API
http_port: 5000                    # Port for the Flask HTTP API

# Storage (auto-detected for your platform, uncomment to override)
# data_dir:
# cache_dir: