Skip to content

Commit 0f6e1e5

Browse files
feat(validation): add workflow image vetting (reanahub#739)
Extend `validate_workflow` to check that all images used in workflows are authorized. Such authorized images are specified via the `vetted_container_images` Helm value. Also add details of container vetting to the `info` API endpoint.
1 parent c75b480 commit 0f6e1e5

5 files changed

Lines changed: 340 additions & 2 deletions

File tree

docs/openapi.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,14 @@
553553
"yadage"
554554
]
555555
},
556+
"vetted_container_images_allowlist": {
557+
"title": "List of container images allowed in user workflows",
558+
"value": []
559+
},
560+
"vetted_container_images_enabled": {
561+
"title": "Whether container image vetting is enabled",
562+
"value": false
563+
},
556564
"workspaces_available": {
557565
"title": "List of available workspaces",
558566
"value": [
@@ -937,6 +945,31 @@
937945
},
938946
"type": "object"
939947
},
948+
"vetted_container_images_allowlist": {
949+
"properties": {
950+
"title": {
951+
"type": "string"
952+
},
953+
"value": {
954+
"items": {
955+
"type": "string"
956+
},
957+
"type": "array"
958+
}
959+
},
960+
"type": "object"
961+
},
962+
"vetted_container_images_enabled": {
963+
"properties": {
964+
"title": {
965+
"type": "string"
966+
},
967+
"value": {
968+
"type": "boolean"
969+
}
970+
},
971+
"type": "object"
972+
},
940973
"workspaces_available": {
941974
"properties": {
942975
"title": {

reana_server/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,14 @@ def _get_int_env_variable(env_variable: str, default: int) -> int:
581581
)
582582
"""Whether users can set custom interactive session images or not."""
583583

584+
REANA_VETTED_CONTAINER_IMAGES = json.loads(
585+
os.getenv(
586+
"REANA_VETTED_CONTAINER_IMAGES",
587+
'{"enabled": false, "allowlist": []}',
588+
)
589+
)
590+
"""Container images that users are allowed to use in their workflows."""
591+
584592
# Kubernetes jobs timeout
585593
# ==================
586594
REANA_KUBERNETES_JOBS_TIMEOUT_LIMIT = os.getenv("REANA_KUBERNETES_JOBS_TIMEOUT_LIMIT")

reana_server/rest/info.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
REANA_INTERACTIVE_SESSION_MAX_INACTIVITY_PERIOD,
3535
REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS,
3636
REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS_CUSTOM_ALLOWED,
37+
REANA_VETTED_CONTAINER_IMAGES,
3738
DASK_ENABLED,
3839
DASK_AUTOSCALER_ENABLED,
3940
REANA_DASK_CLUSTER_DEFAULT_NUMBER_OF_WORKERS,
@@ -217,6 +218,22 @@ def info(user, **kwargs): # noqa
217218
type: string
218219
type: array
219220
type: object
221+
vetted_container_images_enabled:
222+
properties:
223+
title:
224+
type: string
225+
value:
226+
type: boolean
227+
type: object
228+
vetted_container_images_allowlist:
229+
properties:
230+
title:
231+
type: string
232+
value:
233+
items:
234+
type: string
235+
type: array
236+
type: object
220237
supported_workflow_engines:
221238
properties:
222239
title:
@@ -417,6 +434,14 @@ def info(user, **kwargs): # noqa
417434
"title": "Whether users are allowed to spawn custom interactive session images",
418435
"value": "False"
419436
},
437+
"vetted_container_images_enabled": {
438+
"title": "Whether container image vetting is enabled",
439+
"value": false
440+
},
441+
"vetted_container_images_allowlist": {
442+
"title": "List of container images allowed in user workflows",
443+
"value": []
444+
},
420445
"supported_workflow_engines": {
421446
"title": "List of supported workflow engines",
422447
"value": [
@@ -583,6 +608,14 @@ def info(user, **kwargs): # noqa
583608
]
584609
],
585610
),
611+
vetted_container_images_enabled=dict(
612+
title="Whether container image vetting is enabled",
613+
value=REANA_VETTED_CONTAINER_IMAGES["enabled"],
614+
),
615+
vetted_container_images_allowlist=dict(
616+
title="List of container images allowed in user workflows",
617+
value=REANA_VETTED_CONTAINER_IMAGES["allowlist"],
618+
),
586619
supported_workflow_engines=dict(
587620
title="List of supported workflow engines",
588621
value=["cwl", "serial", "snakemake", "yadage"],
@@ -669,6 +702,13 @@ class StringInfoValue(Schema):
669702
value = fields.String(allow_none=False)
670703

671704

705+
class BooleanInfoValue(Schema):
706+
"""Schema for a value represented by a string."""
707+
708+
title = fields.String()
709+
value = fields.Boolean(allow_none=False)
710+
711+
672712
class StringNullableInfoValue(Schema):
673713
"""Schema for a value represented by a nullable string."""
674714

@@ -707,6 +747,8 @@ class InfoSchema(Schema):
707747
kubernetes_max_memory_limit = fields.Nested(StringInfoValue)
708748
interactive_session_recommended_jupyter_images = fields.Nested(ListStringInfoValue)
709749
interactive_sessions_custom_image_allowed = fields.Nested(StringInfoValue)
750+
vetted_container_images_enabled = fields.Nested(BooleanInfoValue)
751+
vetted_container_images_allowlist = fields.Nested(ListStringInfoValue)
710752
supported_workflow_engines = fields.Nested(ListStringInfoValue)
711753
cwl_engine_tool = fields.Nested(StringInfoValue)
712754
cwl_engine_version = fields.Nested(StringInfoValue)

reana_server/validation.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import Dict, List
1414

1515
from reana_commons.config import WORKSPACE_PATHS
16+
from reana_commons.validation.images import extract_images
1617
from reana_commons.errors import REANAValidationError
1718
from reana_commons.validation.compute_backends import build_compute_backends_validator
1819
from reana_commons.validation.operational_options import validate_operational_options
@@ -31,8 +32,8 @@
3132
REANA_DASK_CLUSTER_MAX_SINGLE_WORKER_MEMORY,
3233
REANA_DASK_CLUSTER_DEFAULT_SINGLE_WORKER_THREADS,
3334
REANA_DASK_CLUSTER_MAX_SINGLE_WORKER_THREADS,
35+
REANA_VETTED_CONTAINER_IMAGES,
3436
)
35-
from reana_server import utils
3637

3738

3839
def validate_parameters(reana_yaml: Dict) -> None:
@@ -112,13 +113,29 @@ def validate_inputs(reana_yaml: Dict) -> None:
112113
raise REANAValidationError(f"Input path declared multiple times: {path}")
113114
unique_paths.add(path)
114115

116+
from reana_server import utils
117+
115118
for x, y in itertools.permutations(paths, r=2):
116119
if utils.is_relative_to(x, y):
117120
raise REANAValidationError(
118121
f"Duplicate input paths '{y}' and '{x}' found. Please deduplicate inputs first."
119122
)
120123

121124

125+
def validate_images(reana_yaml: Dict) -> None:
126+
"""Check whether the images used in the workflow are allowed or not.
127+
128+
:param reana_yaml: REANA specification.
129+
"""
130+
if not REANA_VETTED_CONTAINER_IMAGES["enabled"]:
131+
return
132+
133+
allowed_images = REANA_VETTED_CONTAINER_IMAGES["allowlist"]
134+
for image in extract_images(reana_yaml):
135+
if image and image not in allowed_images:
136+
raise REANAValidationError(f"Image not allowed: {image}")
137+
138+
122139
def validate_workflow(reana_yaml: Dict, input_parameters: Dict) -> Dict:
123140
"""Validate REANA workflow specification by calling all the validation utilities.
124141
@@ -137,6 +154,7 @@ def validate_workflow(reana_yaml: Dict, input_parameters: Dict) -> Dict:
137154
validate_compute_backends(reana_yaml)
138155
validate_workspace_path(reana_yaml)
139156
validate_inputs(reana_yaml)
157+
validate_images(reana_yaml)
140158
return reana_yaml_warnings
141159

142160

0 commit comments

Comments
 (0)