-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
executable file
·115 lines (111 loc) · 3.1 KB
/
Copy pathdocker-compose.yml
File metadata and controls
executable file
·115 lines (111 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
include:
- compose/models-gpt.yml
- compose/models-qwen.yml
- compose/models-mistral.yml
- compose/models-glm.yml
- compose/models-gemma.yml
- compose/models-nemotron.yml
- compose/models-llama.yml
# - compose/models-phi.yml
# - compose/models-deepseek.yml
# - compose/models-experimental.yml
services:
waker:
build:
context: .
dockerfile: ./waker/Dockerfile
container_name: vllm-waker
cap_drop: [ALL]
security_opt: ["no-new-privileges:true"]
read_only: true
tmpfs:
- /tmp:noexec,nosuid,size=10m
# ports: ["18080:18080"] #debug
environment:
PORT: "18080"
MANAGE_PREFIX: "vllm-"
IGNORE_NAMES: "vllm-gateway,vllm-waker,vllm-request-validator"
IDLE_STOP_SECONDS: "1200" # 20mn for slow models
NO_STOP_BEFORE_SECONDS: "30"
HEALTH_TIMEOUT_MS: "900000"
DOCKER_STOP_TIMEOUT_SECONDS: "5"
MODEL_HEALTH_URL_TEMPLATE: "http://{name}:8000/health"
TICK_MS: "1000"
BUSY_STATUS_CODE: "429"
VERBOSE: ${WAKER_VERBOSE:-0}
logging:
driver: ${DOCKER_LOG_DRIVER:-json-file}
options:
max-size: "10m"
max-file: "3"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ./stats:/stats
- ./models.json:/config/models.json:ro
restart: unless-stopped
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:18080/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
interval: 30s
timeout: 5s
retries: 5
networks:
- vllm_internal
# - default #debug
request-validator:
build:
context: .
dockerfile: ./request-validator/Dockerfile
container_name: vllm-request-validator
cap_drop: [ALL]
security_opt: ["no-new-privileges:true"]
read_only: true
tmpfs:
- /tmp:noexec,nosuid,size=10m
environment:
PORT: "18081"
VERBOSE: ${WAKER_VERBOSE:-0}
logging:
driver: ${DOCKER_LOG_DRIVER:-json-file}
options:
max-size: "10m"
max-file: "3"
restart: unless-stopped
networks:
- vllm_internal
volumes:
- ./models.json:/config/models.json:ro
healthcheck:
test: ["CMD", "node", "healthcheck.js"]
interval: 30s
timeout: 5s
retries: 3
api-gateway:
image: nginx:1.27-alpine@sha256:63ffc0d1f14e4082b832c6a42e606e9a0384a526f16ddd720af7c1f018f2f7c4
container_name: vllm-gateway
entrypoint: ["nginx", "-g", "daemon off;"]
cap_drop: [ALL]
cap_add: [CHOWN, SETGID, SETUID]
security_opt: ["no-new-privileges:true"]
read_only: true
tmpfs:
- /tmp:noexec,nosuid,size=10m
- /var/cache/nginx:noexec,nosuid,size=10m
- /run:noexec,nosuid,size=1m
depends_on:
request-validator:
condition: service_healthy
ports: ["127.0.0.1:8009:8080"]
logging:
driver: ${DOCKER_LOG_DRIVER:-json-file}
options:
max-size: "10m"
max-file: "3"
volumes:
- ./gateway.conf:/etc/nginx/conf.d/gateway.conf:ro
restart: unless-stopped
networks:
- default
- vllm_internal
networks:
vllm_internal:
internal: true