-
Notifications
You must be signed in to change notification settings - Fork 92
Expand file tree
/
Copy pathdevspace.yaml
More file actions
265 lines (245 loc) · 10.7 KB
/
Copy pathdevspace.yaml
File metadata and controls
265 lines (245 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# DevSpace configuration for local ARK services development
version: v2beta1
name: ark-stack
# Dependencies - include other DevSpace configurations
dependencies:
localhost-gateway:
path: ./services/localhost-gateway
ark-controller:
path: ./ark
ark-broker:
path: ./services/ark-broker
ark-api:
path: ./services/ark-api
ark-dashboard:
path: ./services/ark-dashboard
# argo-workflows:
# path: ./services/argo-workflows
# ark-api-a2a:
# path: ./services/ark-api-a2a
# ark-mcp:
# path: ./services/ark-mcp
# Observability services. Uncomment to enable locally. Only phoenix or langfuse is needed
# phoenix service for observability. phoenix has a hook to restart ark-controller to pick up its url
# phoenix:
# git: https://github.com/mckinsey/agents-at-scale-marketplace
# tag: phoenix-v0.1.5
# subPath: services/phoenix
# langfuse service for observability. langfuse has a hook to restart ark-controller to pick up its url
# langfuse:
# git: https://github.com/mckinsey/agents-at-scale-marketplace
# tag: langfuse-v0.1.4
# subPath: services/langfuse
# Vars - To disable, set environment variables before the devspace commands
# For example: ENABLE_CERT_MANAGER=false devspace dev
# For PostgreSQL controller backend: STORAGE_BACKEND=postgresql devspace dev
# For Postgres message backend: BROKER_MESSAGE_BACKEND=postgres devspace dev
vars:
STORAGE_BACKEND:
source: env
default: "etcd"
BROKER_MESSAGE_BACKEND:
source: env
default: "memory"
ENABLE_CERT_MANAGER:
source: env
default: true
ENABLE_GATEWAY_API_CRDS:
source: env
default: true
ENABLE_ARK_TENANT:
source: env
default: true
ENABLE_ARGO:
source: env
default: false
commands:
routes:
./services/localhost-gateway/scripts/show-routes.sh
routes-forward: |-
kill $(lsof -ti :8080) 2>/dev/null || true
echo "Starting port-forward on localhost:8080..."
nohup kubectl port-forward -n ark-system service/localhost-gateway-nginx 8080:80 > /dev/null 2>&1 &
sleep 1
PID=$(lsof -ti :8080)
if [ -n "$PID" ]; then
echo "Port-forward running in background (PID: $PID)"
else
echo "Failed to start port-forward. Is the gateway deployed?"
fi
profiles:
- name: enable-cert-manager
activation:
- vars:
ENABLE_CERT_MANAGER: true
patches:
- op: add
path: deployments
value:
cert-manager:
namespace: cert-manager
helm:
chart:
name: cert-manager
repo: https://charts.jetstack.io
releaseName: cert-manager
upgradeArgs: [ "--install" ]
values:
installCRDs: true
# Disable the post-install Job that probes the cert-manager webhook.
# The probe runs before the webhook TLS is stable, causing it to hang.
startupapicheck:
enabled: false
- name: enable-gateway-api
activation:
- vars:
ENABLE_GATEWAY_API_CRDS: true
patches:
- op: add
path: deployments
value:
gateway-api:
namespace: default
kubectl:
manifests:
- "https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml"
- name: enable-ark-tenant
activation:
- vars:
ENABLE_ARK_TENANT: true
patches:
- op: add
path: deployments
value:
ark-tenant:
namespace: default
helm:
chart:
path: ./charts/ark-tenant
releaseName: ark-tenant
upgradeArgs: [ "--install" ]
- name: enable-argo
activation:
- vars:
ENABLE_ARGO: true
patches:
- op: add
path: dependencies
value:
argo-workflows:
path: ./services/argo-workflows
pipelines:
deploy: |-
# Start gateway-api CRD installation in background so it runs in parallel
# with cert-manager startup instead of sequentially after it.
GATEWAY_PID=""
if [ "${ENABLE_GATEWAY_API_CRDS}" == "true" ]; then
if ! kubectl get crd gateways.gateway.networking.k8s.io > /dev/null 2>&1; then
kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml" &
GATEWAY_PID=$!
fi
fi
if [ "${ENABLE_CERT_MANAGER}" == "true" ]; then
# Skip installation if cert-manager is already deployed (idempotent re-deploys).
if ! helm status cert-manager -n cert-manager > /dev/null 2>&1; then
create_deployments cert-manager
fi
# Wait for webhook and cainjector to be fully rolled out before proceeding.
# The webhook must be running before any cert-manager resources (Issuer, Certificate)
# can be created, otherwise Helm will get x509 errors calling the webhook TLS endpoint.
kubectl rollout status deployment/cert-manager-webhook -n cert-manager --timeout=120s
kubectl rollout status deployment/cert-manager-cainjector -n cert-manager --timeout=120s
# Wait for cainjector to populate the webhook's CABundle field. Only needed on first
# install — once the selfsigned-issuer exists it persists across re-deploys (see
# ark/dist/chart/templates/certmanager/certificate.yaml for why).
if ! kubectl get issuer selfsigned-issuer -n ark-system > /dev/null 2>&1; then
echo "Waiting for cert-manager webhook CABundle..."
until kubectl get mutatingwebhookconfiguration cert-manager-webhook -o jsonpath='{.webhooks[0].clientConfig.caBundle}' 2>/dev/null | grep -q .; do sleep 2; done
fi
fi
# Wait for background gateway-api install to finish before creating ark-controller,
# which depends on gateway CRDs being established.
if [ -n "${GATEWAY_PID}" ]; then wait ${GATEWAY_PID}; fi
run_dependency_pipelines ark-controller --pipeline=deploy
if [ "${ENABLE_CERT_MANAGER}" == "true" ]; then
# Verify cert-manager can actually issue the webhook certificate end-to-end.
# rollout status + CABundle checks above only confirm pods are running and the
# webhook config is patched — they don't catch issuance failures (e.g. broken
# RBAC, controller errors). Waiting for serving-cert to be Ready provides that
# guarantee before dependent services try to call the webhook.
kubectl wait --for=condition=Ready certificate/serving-cert -n ark-system --timeout=60s
fi
if [ "${ENABLE_ARK_TENANT}" == "true" ]; then
echo "Waiting for Ark API to be available..."
if [ "${STORAGE_BACKEND}" == "etcd" ]; then
kubectl wait --for=condition=Established crd/tools.ark.mckinsey.com --timeout=60s
else
kubectl wait --for=condition=Available apiservice/v1alpha1.ark.mckinsey.com --timeout=300s
fi
create_deployments ark-tenant
fi
if [ "${ENABLE_ARGO}" == "true" ]; then
run_dependency_pipelines argo-workflows --pipeline=deploy
fi
run_dependency_pipelines --all --exclude ark-controller --exclude argo-workflows --pipeline=deploy
echo ""
echo "All services deployed. Waiting for pods to be ready..."
# Wait for pods before streaming logs; kubectl logs -f fails immediately if a
# pod is still in ContainerCreating state.
kubectl wait pod -l app -n "${DEVSPACE_NAMESPACE}" --for=condition=Ready --timeout=120s 2>/dev/null || true
echo "Streaming logs from namespace ${DEVSPACE_NAMESPACE} (Ctrl+C to stop)..."
echo ""
kubectl logs -f -n "${DEVSPACE_NAMESPACE}" --all-containers --prefix --max-log-requests=10 -l app
dev: |-
# Start gateway-api CRD installation in background so it runs in parallel
# with cert-manager startup instead of sequentially after it.
GATEWAY_PID=""
if [ "${ENABLE_GATEWAY_API_CRDS}" == "true" ]; then
if ! kubectl get crd gateways.gateway.networking.k8s.io > /dev/null 2>&1; then
kubectl apply -f "https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.3.0/standard-install.yaml" &
GATEWAY_PID=$!
fi
fi
if [ "${ENABLE_CERT_MANAGER}" == "true" ]; then
# Skip installation if cert-manager is already deployed (idempotent re-deploys).
if ! helm status cert-manager -n cert-manager > /dev/null 2>&1; then
create_deployments cert-manager
fi
# Wait for webhook and cainjector to be fully rolled out before proceeding.
# The webhook must be running before any cert-manager resources (Issuer, Certificate)
# can be created, otherwise Helm will get x509 errors calling the webhook TLS endpoint.
kubectl rollout status deployment/cert-manager-webhook -n cert-manager --timeout=120s
kubectl rollout status deployment/cert-manager-cainjector -n cert-manager --timeout=120s
# Wait for cainjector to populate the webhook's CABundle field. Only needed on first
# install — once the selfsigned-issuer exists it persists across re-deploys (see
# ark/dist/chart/templates/certmanager/certificate.yaml for why).
if ! kubectl get issuer selfsigned-issuer -n ark-system > /dev/null 2>&1; then
echo "Waiting for cert-manager webhook CABundle..."
until kubectl get mutatingwebhookconfiguration cert-manager-webhook -o jsonpath='{.webhooks[0].clientConfig.caBundle}' 2>/dev/null | grep -q .; do sleep 2; done
fi
fi
# Wait for background gateway-api install to finish before creating ark-controller,
# which depends on gateway CRDs being established.
if [ -n "${GATEWAY_PID}" ]; then wait ${GATEWAY_PID}; fi
run_dependency_pipelines ark-controller --pipeline=dev
if [ "${ENABLE_CERT_MANAGER}" == "true" ]; then
# Verify cert-manager can actually issue the webhook certificate end-to-end.
# rollout status + CABundle checks above only confirm pods are running and the
# webhook config is patched — they don't catch issuance failures (e.g. broken
# RBAC, controller errors). Waiting for serving-cert to be Ready provides that
# guarantee before dependent services try to call the webhook.
kubectl wait --for=condition=Ready certificate/serving-cert -n ark-system --timeout=60s
fi
if [ "${ENABLE_ARK_TENANT}" == "true" ]; then
echo "Waiting for Ark API to be available..."
if [ "${STORAGE_BACKEND}" == "etcd" ]; then
kubectl wait --for=condition=Established crd/tools.ark.mckinsey.com --timeout=60s
else
kubectl wait --for=condition=Available apiservice/v1alpha1.ark.mckinsey.com --timeout=300s
fi
create_deployments ark-tenant
fi
if [ "${ENABLE_ARGO}" == "true" ]; then
run_dependency_pipelines argo-workflows --pipeline=dev
fi
run_dependency_pipelines --all --exclude ark-controller --exclude argo-workflows --pipeline=dev