whisperx-api-server/compose.yaml at main · STARTcloud/whisperx-api-server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
services:
  whisperx-api-server-cuda:
    image: whisperx-api-server-cuda
    build:
      context: .
      dockerfile: Dockerfile.cuda
    healthcheck:
      test: ["CMD-SHELL", "curl --fail http://localhost:8000/healthcheck || exit 1"]
    command: uvicorn --factory whisperx_api_server.main:create_app
    ports:
      - 8000:8000
    volumes:
      # Persist job queue database and uploads
      - /data/whisperx-api/data:/workspace/data
      # Persist downloaded models (HuggingFace and PyTorch)
      - /data/whisperx-api/models:/root/.cache/huggingface
      - /data/whisperx-api/models:/root/.cache/torch
    environment:
      # Database and uploads path
      - DATABASE_PATH=/workspace/data/whisperx.db
      - UPLOAD_DIR=/workspace/data/uploads
      # Optional: Set HuggingFace token for diarization
      - HF_TOKEN=${HF_TOKEN:-}
      # Default model for transcriptions (use double underscore for nested config)
      - WHISPER__MODEL=${WHISPERX_MODEL:-large-v2}
      # Compute type for GPU
      - WHISPER__COMPUTE_TYPE=${WHISPERX_COMPUTE_TYPE:-float16}
      - WHISPER__INFERENCE_DEVICE=${WHISPERX_DEVICE:-cuda}
      # Use Silero VAD (compatible with cuDNN 9)
      - WHISPER__VAD_METHOD=silero
      # Diarization model (use 3.1 with pyannote 3.x)
      - DIARIZATION__MODEL=pyannote/speaker-diarization-3.1
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]


  whisperx-api-server-cpu:
    image: whisperx-api-server-cpu
    build:
      context: .
      dockerfile: Dockerfile.cpu
    healthcheck:
      test: ["CMD-SHELL", "curl --fail http://localhost:8000/healthcheck || exit 1"]
    command: uvicorn --factory whisperx_api_server.main:create_app
    ports:
      - 8000:8000
    volumes:
      # Persist job queue database and uploads
      - /data/whisperx-api/data:/workspace/data
      # Persist downloaded models (HuggingFace and PyTorch)
      - /data/whisperx-api/models:/root/.cache/huggingface
      - /data/whisperx-api/models:/root/.cache/torch
    environment:
      # Database and uploads path
      - DATABASE_PATH=/workspace/data/whisperx.db
      - UPLOAD_DIR=/workspace/data/uploads