docker_mlx_cpp/models/presets.yaml at main · RobotFlow-Labs/docker_mlx_cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# docker_mlx_cpp — Model Presets
# Curated models for different workloads. Use preset names instead of full model IDs.
# Usage: mlx-cpp run chat-small "hello"

presets:
  # ── Chat / General Purpose ─────────────────────────────────────────────────
  chat-small:
    model: mlx-community/SmolLM2-360M-Instruct-4bit
    context_size: 4096
    description: "Tiny chat model — fits on any Mac (8GB+)"
    min_memory_gb: 8

  chat-default:
    model: mlx-community/Llama-3.2-3B-Instruct-4bit
    context_size: 8192
    description: "Balanced quality/speed for general use"
    min_memory_gb: 8

  chat-quality:
    model: mlx-community/Llama-3.2-8B-Instruct-4bit
    context_size: 8192
    description: "Higher quality chat — needs 16GB+"
    min_memory_gb: 16

  chat-large:
    model: mlx-community/Mistral-Small-3.1-24B-Instruct-2503-4bit
    context_size: 32768
    description: "Large model for complex reasoning — needs 32GB+"
    min_memory_gb: 32

  # ── Code ───────────────────────────────────────────────────────────────────
  code:
    model: mlx-community/Qwen2.5-Coder-7B-Instruct-4bit
    context_size: 32768
    description: "Code generation and completion — 80+ languages"
    min_memory_gb: 16

  code-small:
    model: mlx-community/Qwen2.5-Coder-3B-Instruct-4bit
    context_size: 16384
    description: "Lightweight code model"
    min_memory_gb: 8

  # ── Vision-Language Models ─────────────────────────────────────────────────
  vision:
    model: mlx-community/Qwen2-VL-7B-Instruct-4bit
    context_size: 8192
    description: "Image + text understanding"
    min_memory_gb: 16

  vision-small:
    model: mlx-community/SmolVLM2-2.2B-Instruct
    context_size: 4096
    description: "Lightweight vision model with video support"
    min_memory_gb: 8

  # ── Image Generation ───────────────────────────────────────────────────────
  image-gen:
    model: mlx-community/FLUX.1-schnell-4bit
    description: "Fast image generation (FLUX)"
    min_memory_gb: 16

  image-gen-sd:
    model: stabilityai/stable-diffusion-xl-base-1.0
    description: "Stable Diffusion XL"
    min_memory_gb: 16

  # ── Audio ──────────────────────────────────────────────────────────────────
  audio-stt:
    model: mlx-community/whisper-large-v3-turbo-asr-fp16
    description: "Speech-to-text (Whisper)"
    min_memory_gb: 8

  audio-tts:
    model: mlx-community/Kokoro-82M-bf16
    description: "Text-to-speech (Kokoro)"
    min_memory_gb: 8

  # ── Embeddings ─────────────────────────────────────────────────────────────
  embeddings:
    model: mlx-community/jina-embeddings-v5-small
    description: "Text embeddings for search/retrieval (1024-dim)"
    min_memory_gb: 8

  embeddings-large:
    model: mlx-community/bge-large-en-v1.5
    description: "High-quality English embeddings"
    min_memory_gb: 8