-
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathtenzir.yaml.example
More file actions
579 lines (470 loc) · 22.5 KB
/
Copy pathtenzir.yaml.example
File metadata and controls
579 lines (470 loc) · 22.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
# This is an example configuration file for Tenzir that shows all available
# options. Options in angle brackets have their default value determined at
# runtime.
# Options that concern Tenzir.
tenzir:
# The token that is offered when connecting to the Tenzir Platform.
# It is used to identify the node and assign it to the correct workspace.
# This setting is ignored in the open-source edition of Tenzir, which does
# not contain the platform plugin.
token: tnz_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# The host and port to listen at for node-to-node connections in the form
# `<host>:<port>`. Host or port may be emitted to use their defaults, which
# are localhost and 5158, respectively. Set the port to zero to automatically
# choose a port. Set to false to disable exposing an endpoint.
endpoint: localhost:5158
# The timeout for connecting to a Tenzir server. Set to 0 seconds to wait
# indefinitely.
connection-timeout: 5m
# The delay between two connection attempts. Set to 0s to try connecting
# without retries.
connection-retry-delay: 3s
# Configure retention policies.
retention:
# How long to keep metrics for. Set to 0s to disable metrics retention
# entirely.
# WARNING: A low retention period may negatively impact the usability of
# pipeline activity in the Tenzir Platform.
#metrics: 16d
# How long to keep legacy operator metrics for. Set to 0s to avoid storing
# these heavy metrics while still making live metrics available.
#operator-metrics: 0s
# How long to keep operator profile metrics for. Set to 0s to avoid storing
# these heavy metrics while still making live metrics available.
#operator-profile-metrics: 1d
# How long to keep diagnostics for. Set to 0s to disable diagnostics
# retention entirely.
# WARNING: A low retention period may negatively impact the usability of
# diagnostics in the Tenzir Platform.
#diagnostics: 30d
# Configure the behavior of the `cache` operator. The Tenzir Platform uses the
# cache operator to store and retrieve data efficiently.
cache:
# Specifies the default lifetime for the `cache` operator.
#lifetime: 10min
# Specifies an upper bound for the total memory usage in bytes across all
# caches in a node. If the memory usage exceeds this limit, the node will
# start evicting caches to make room for new data. The node requires a
# minimum total cache capacity of 64MiB.
#capacity: 1Gi
# A certificate file used as the default for operators accepting a `cacert`
# option. This will default to an appropriate directory for the system. For
# example:
# - /etc/ssl/certs/ca-bundle.crt on RedHat
# - /etc/ssl/certs/ca-certificates.crt on Ubuntu
#cacert:
# TLS configuration that applies to all operators supporting TLS, such as
# from_http, accept_tcp, to_tcp, to_opensearch, from_opensearch, to_splunk,
# save_email, and to_fluent_bit. Operators can override these settings
# individually via their `tls` option.
tls:
# Enable TLS on all operators that support it.
#enable: false
# Disable certificate verification (not recommended for production).
#skip-peer-verification: false
# Path to a CA certificate bundle for server verification.
#cacert:
# Path to a client certificate file.
#certfile:
# Path to a client private key file.
#keyfile:
# Password to decrypt the private key in `keyfile`, if it is encrypted.
#password:
# Minimum TLS protocol version.
# Valid values: "1.0", "1.1", "1.2", "1.3".
#tls-min-version:
# OpenSSL cipher list string.
#tls-ciphers:
# Path to a CA certificate for validating client certificates (mTLS).
# Only applies to operators that accept incoming connections.
#tls-client-ca:
# Require clients to present valid certificates signed by the client CA
# (mTLS). Only applies to operators that accept incoming connections.
#require-client-cert: false
# The file system path used for persistent state.
# Defaults to one of the following paths, selecting the first that is
# available:
# - $STATE_DIRECTORY
# - $PWD/tenzir.db
#state-directory:
# The file system path used for recoverable state.
# In a node process, defaults to the first of the following paths that is
# available:
# - $CACHE_DIRECTORY
# - $XDG_CACHE_HOME
# - $XDG_HOME_DIR/.cache/tenzir (linux) or $XDG_HOME_DIR/Libraries/caches/tenzir (mac)
# - $HOME/.cache/tenzir (linux) or $HOME/Libraries/caches/tenzir (mac)
# - $TEMPORARY_DIRECTORY/tenzir-cache-<uid>
# To determine $TEMPORARY_DIRECTORY, the values of TMPDIR, TMP, TEMP, TEMPDIR are
# checked in that order, and as a last resort "/tmp" is used.
# In a client process, this setting is ignored and
# `$TEMPORARY_DIRECTORY/tenzir-client-cache-<uid>` is used as cache directory.
#cache-directory:
# The file system path used for log files.
# Defaults to one of the following paths, selecting the first that is
# available:
# - $LOGS_DIRECTORY/server.log
# - <state-directory>/server.log
#log-file:
# The file system path used for client log files relative to the current
# working directory of the client. Note that this is disabled by default.
# If not specified no log files are written for clients at all.
client-log-file: "client.log"
# Format for printing individual log entries to the log-file.
# For a list of valid format specifiers, see spdlog format specification
# at https://github.com/gabime/spdlog/wiki/3.-Custom-formatting.
file-format: "[%Y-%m-%dT%T.%e%z] [%n] [%l] [%s:%#] %v"
# Configures the minimum severity of messages written to the log file.
# Possible values: quiet, error, warning, info, verbose, debug, trace.
# File logging is only available for commands that start a node (e.g.,
# tenzir-node). The levels above 'verbose' are usually not available in
# release builds.
file-verbosity: debug
# Whether to enable automatic log rotation. If set to false, a new log file
# will be created when the size of the current log file exceeds 10 MiB.
disable-log-rotation: false
# The size limit when a log file should be rotated.
log-rotation-threshold: 10MiB
# Maximum number of log messages in the logger queue.
log-queue-size: 1000000
# The sink type to use for console logging. Possible values: stderr,
# syslog, journald. Note that 'journald' can only be selected on linux
# systems, and only if Tenzir was built with journald support.
# The journald sink is used as default if Tenzir is started as a systemd
# service and the service is configured to use the journal for stderr,
# otherwise the default is the unstructured stderr sink.
#console-sink: stderr/journald
# Mode for console log output generation. Automatic renders color only when
# writing to a tty.
# Possible values: always, automatic, never. (default automatic)
console: automatic
# Format for printing individual log entries to the console. For a list
# of valid format specifiers, see spdlog format specification at
# https://github.com/gabime/spdlog/wiki/3.-Custom-formatting.
console-format: "%^[%T.%e] %v%$"
# Configures the minimum severity of messages written to the console.
# For a list of valid log levels, see file-verbosity.
console-verbosity: info
# List of directories to look for schema files in ascending order of
# priority.
schema-dirs: []
# Additional directories to load plugins specified using `tenzir.plugins`
# from.
plugin-dirs: []
# List of paths that contain statically configured packages.
# This setting is ignored unless the package manager plugin is enabled.
package-dirs: []
# The plugins to load at startup. For relative paths, Tenzir tries to find
# the files in the specified `tenzir.plugin-dirs`. The special values
# 'bundled' and 'all' enable autoloading of bundled and all plugins
# respectively. Note: Add `example` or `/path/to/libtenzir-plugin-example.so`
# to load the example plugin.
plugins: []
# Names of plugins and builtins to explicitly forbid from being used in
# Tenzir. For example, adding `shell` will prohibit use of the `shell`
# operator builtin, and adding `kafka` will prohibit use of the `kafka`
# connector plugin.
disable-plugins: []
# Forbid unsafe location overrides for pipelines with the 'local' and 'remote'
# keywords, e.g., remotely reading from a file.
no-location-overrides: false
# Prevent all pipelines from automatically starting when the node starts.
no-autostart: false
# Enable subprocess pipelines.
pipeline-subprocesses: false
# The size of an index shard, expressed in number of events. This should
# be a power of 2.
max-partition-size: 4Mi
# Timeout after which the importer forwards events to subscribers like `export
# live=true` or `metrics live=true`. Set to 0s for an unbuffered mode. A
# higher value increases performance, and a lower value reduces latency.
import-buffer-timeout: 1s
# Timeout after which an active partition is forcibly flushed, regardless of
# its size.
active-partition-timeout: 30s
# Maximum number of events across all active partitions. This indirectly
# controls the maximum memory usage when importing events.
max-buffered-events: 12Mi
# Automatically rebuild undersized and outdated partitions in the background.
# The given number controls how much resources to spend on it. Set to 0 to
# disable.
automatic-rebuild: 1
# Timeout after which an automatic rebuild is triggered.
rebuild-interval: 30min
# Zstd compression level applied to the Feather store backend.
# zstd-compression-level: <default>
# The URL of the control endpoint when connecting to a self-hosted
# instance of the Tenzir Platform.
platform-control-endpoint: wss://ws.tenzir.app/production
# Whether to undermine the security of the TLS connection to the
# Tenzir Platform by disabling certificate validation.
# Setting this to `true` is strongly discouraged.
platform-skip-peer-verification: false
# The name to use when connecting to the platform as an ephemeral node.
# This setting is ignored unless a workspace token is used to connect to
# the platform. Workspace tokens are currently only available for the
# Sovereign Edition of the Tenzir Platform.
platform-ephemeral-node-name: Ephemeral Node
# Control how operator's calculate demand from their upstream operator. Note
# that this is an expert feature and should only be changed if you know what
# you are doing. The configured values can also be changed per operator by
# using the `_tune` operator.
demand:
# Issue demand only if room for at least this many elements is available.
# Must be greater than zero. Values may either be set to a number, or to a
# record containing `bytes` and `events` fields with numbers depending on
# the operator's input type.
min-elements:
bytes: 128Ki
events: 8Ki
# Controls how many elements may be buffered until the operator stops
# issuing demand. Must be greater or equal to min-elements. Values may
# either be set to a number, or to a record containing `bytes` and `events`
# fields with numbers depending on the operator's input type.
max-elements:
bytes: 4Mi
events: 254Ki
# Controls how many batches of elements may be buffered until the operator
# stops issuing demand. Must be greater than zero.
max-batches: 10
# Controls the minimum backoff duration after an operator is detected to be
# idle. Must be at least 10ms.
min-backoff: 10ms
# Controls the maximum backoff duration after an operator is detected to be
# idle. Must be at least 10ms.
max-backoff: 1s
# Controls the growth rate of the backoff duration for operators that
# continue to be idle. Must be at least 1.0. Note that setting a growth rate
# of 1.0 causes the `max-backoff` duration to be ignored, replacing the
# exponential growth with a constant value.
backoff-rate: 2.0
# Context configured as part of the configuration that are always available.
contexts:
# A unique name for the context that's used in the context, enrich, and
# lookup operators to refer to the context.
indicators:
# The type of the context.
type: bloom-filter
# Arguments for creating the context, depending on the type. Refer to the
# documentation of the individual context types to see the arguments they
# require. Note that changes to these arguments to not apply to any
# contexts that were previously created.
arguments:
capacity: 1B
fp-probability: 0.001
# The `index` key is used to adjust the false-positive rate of
# the first-level lookup data structures (called synopses) in the
# catalog. The lower the false-positive rate the more space will be
# required, so this setting can be used to manually tune the trade-off
# of performance vs. space.
index:
# The default false-positive rate for type synopses.
default-fp-rate: 0.01
# rules:
# Every rule adjusts the behaviour of Tenzir for a set of targets.
# Tenzir creates one synopsis per target. Targets can be either types
# or field names.
#
# fp-rate - false positive rate. Has effect on string and address type
# targets
#
# partition-index - Tenzir will not create dense index when set to false
# - targets: [:ip]
# fp-rate: 0.01
# The `tenzir-ctl start` command starts a new Tenzir server process.
start:
# Prints the endpoint for clients when the server is ready to accept
# connections. This comes in handy when letting the OS choose an
# available random port, i.e., when specifying 0 as port value.
print-endpoint: false
# Writes the endpoint for clients when the server is ready to accept
# connections to the specified destination. This comes in handy when letting
# the OS choose an available random port, i.e., when specifying 0 as port
# value, and `print-endpoint` is not sufficient.
#write-endpoint: /tmp/tenzir-node-endpoint
# An ordered list of commands to run inside the node after starting.
# As an example, to configure an auto-starting PCAP source that listens
# on the interface 'en0' and lives inside the Tenzir node, add `spawn
# source pcap -i en0`.
# Note that commands are not executed sequentially but in parallel.
commands: []
# Triggers removal of old data when the disk budget is exceeded.
disk-budget-high: 0GiB
# When the budget was exceeded, data is erased until the disk space is
# below this value.
disk-budget-low: 0GiB
# Seconds between successive disk space checks.
disk-budget-check-interval: 60
# When erasing, how many partitions to erase in one go before rechecking
# the size of the database directory.
disk-budget-step-size: 1
# Binary to use for checking the size of the database directory. If left
# unset, Tenzir will recursively add up the size of all files in the
# database directory to compute the size. Mainly useful for e.g.
# compressed filesystem where raw file size is not the correct metric.
# Must be the absolute path to an executable file, which will get passed
# the database directory as its first and only argument.
#disk-budget-check-binary: /opt/tenzir/libexec/tenzir-df-percent.sh
# User-defined operators.
operators:
# The Zeek operator is an example that takes raw bytes in the form of a
# PCAP and then parses Zeek's output via the `zeek-json` format to generate
# a stream of events.
zeek: |
shell "zeek -r - LogAscii::output_to_stdout=T
JSONStreaming::disable_default_logs=T
JSONStreaming::enable_log_rotation=F
json-streaming-logs"
read_zeek_json
# The Suricata operator is analogous to the above Zeek example, with the
# difference that we are using Suricata. The commmand line configures
# Suricata such that it reads PCAP on stdin and produces EVE JSON logs on
# stdout, which we then parse with the `suricata` format.
suricata: |
shell "suricata -r /dev/stdin
--set outputs.1.eve-log.filename=/dev/stdout
--set logging.outputs.0.console.enabled=no"
read_suricata
# In addition to running pipelines interactively, you can also deploy
# *Pipelines as Code*. This infrastrucutre-as-code-like method differs from
# pipelines run on the command-line or through app.tenzir.com in two ways:
# 1. Pipelines deployed as code always start alongside the Tenzir node.
# 2. Deletion via the user interface is not allowed for pipelines configured
# as code.
pipelines:
# A unique identifier for the pipeline that's used for metrics, diagnostics,
# and API calls interacting with the pipeline.
publish-suricata:
# An optional user-facing name for the pipeline. Defaults to the id.
name: Import Suricata from TCP
# The definition of the pipeline. Configured pipelines that fail to start
# cause the node to fail to start.
definition: |
accept_tcp "0.0.0.0:34343" { read_suricata schema_only=true }
| where event_type != "stats"
| publish "suricata"
# Pipelines that encounter an error stop running and show an error state.
# This option causes pipelines to automatically restart when they
# encounter an error instead. The first restart happens immediately, and
# subsequent restarts after the configured delay, defaulting to 1 minute.
# The following values are valid for this option:
# - Omit the option, or set it to null or false to disable.
# - Set the option to true to enable with the default delay of 1 minute.
# - Set the option to a valid duration to enable with a custom delay.
restart-on-error: 1 minute
# Pipelines that are unstoppable will run automatically and indefinitely.
# They are not able to pause or stop.
# If they do complete, they will end up in a failed state.
# If `restart-on-error` is enabled, they will restart after the specified
# duration.
unstoppable: false
# Use the legacy secret model. Under this model, the `secret` function yields
# plain `string`s and can only look up secrets from the `tenzir.secrets`
# section in this config, but not from the Tenzir Platform's secret store.
legacy-secret-model: false
# Enables the `secret_assert` operator. This operator can be used for our
# integration tests and may be useful to test local setups.
# Since it theoretically allows for brute-forcing a secret's value, it is
# disabled by default.
enable-assert-secret-operator: false
# Local secrets, defined as key - value pairs. The values must be strings
secrets:
# my-secret-name: my-secret-value
# Plugin-specific configuration.
plugins:
# TLS settings for the connection from the node to the Tenzir Platform.
# These options share the semantics of the corresponding node-level
# tenzir.tls settings, but apply only to the node <-> platform connection and
# override the matching node-level setting. The node connects to the platform
# as an outbound client, so the server-side mTLS options (`tls-client-ca`,
# `require-client-cert`) and the `enable` toggle do not apply here.
platform:
#skip-peer-verification:
#cacert:
#certfile:
#keyfile:
#tls-min-version:
#tls-ciphers:
# The below settings are internal to CAF, and aren't checked by Tenzir directly.
# Please be careful when changing these options. Note that some CAF options may
# be in conflict with Tenzir options, and are only listed here for completeness.
caf:
# Options affecting the internal scheduler.
scheduler:
# Accepted alternative: "sharing".
policy: stealing
# Configures whether the scheduler generates profiling output.
enable-profiling: false
# Output file for profiler data (only if profiling is enabled).
#profiling-output-file: </dev/null>
# Measurement resolution in milliseconds (only if profiling is enabled).
profiling-resolution: 100ms
# Forces a fixed number of threads if set. Defaults to the number of
# available CPU cores if starting a Tenzir node, or *2* for client commands.
#max-threads: <number of cores>
# Maximum number of messages actors can consume in one run.
max-throughput: 500
# When using "stealing" as scheduler policy.
work-stealing:
# Number of zero-sleep-interval polling attempts.
aggressive-poll-attempts: 100
# Frequency of steal attempts during aggressive polling.
aggressive-steal-interval: 10
# Number of moderately aggressive polling attempts.
moderate-poll-attempts: 500
# Frequency of steal attempts during moderate polling.
moderate-steal-interval: 5
# Sleep interval between poll attempts.
moderate-sleep-duration: 50us
# Frequency of steal attempts during relaxed polling.
relaxed-steal-interval: 1
# Sleep interval between poll attempts.
relaxed-sleep-duration: 10ms
stream:
# Maximum delay for partial batches.
max-batch-delay: 15ms
# Selects an implementation for credit computation.
# Accepted alternative: "token-based".
credit-policy: token-based
# When using "size-based" as credit-policy.
size-based-policy:
# Desired batch size in bytes.
bytes-per-batch: 32
# Maximum input buffer size in bytes.
buffer-capacity: 256
# Frequency of collecting batch sizes.
sampling-rate: 100
# Frequency of re-calibrations.
calibration-interval: 1
# Factor for discounting older samples.
smoothing-factor: 2.5
# When using "token-based" as credit-policy.
token-based-policy:
# Number of elements per batch.
batch-size: 1
# Max. number of elements in the input buffer.
buffer-size: 64
# Collecting metrics can be resource consuming. This section is used for
# filtering what should and what should not be collected
metrics-filters:
# Rules for actor based metrics filtering.
actors:
# List of selected actors for run-time metrics.
includes: []
# List of excluded actors from run-time metrics.
excludes: []
# Configure using OpenSSL for node-to-node connections.
# NOTE: Use the tenzir.endpoint variable to configure the endpoint.
openssl:
# Path to the PEM-formatted certificate file.
certificate:
# Path to the private key file for this node.
key:
# Passphrase to decrypt the private key.
passphrase:
# Path to an OpenSSL-style directory of trusted certificates.
capath:
# Path to a file of concatenated PEM-formatted certificates.
cafile:
# Colon-separated list of OpenSSL cipher strings to use.
cipher-list: