Skip to content

Commit def689d

Browse files
committed
🎨 Validate webutil.TELEMETRY with pydantic
1 parent 83bed8c commit def689d

3 files changed

Lines changed: 62 additions & 31 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ All notable changes to this library are documented in this file.
1313
- Return signature of `str2multipolygon` changed to include a list of errors.
1414
- `webutil.write_telemetry` now writes to dedicated socket
1515
`/run/rsyslog/iemweb.sock` with
16-
`webutil.TELEMETRY_PREFIX` and a JSON payload string.
16+
`webutil.TELEMETRY_PREFIX` and a JSON payload string. `webutil.TELEMETRY` is
17+
a pydantic model.
1718

1819
### New Features
1920

src/pyiem/webutil.py

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import html
44
import inspect
55
import io
6-
import json
76
import os
87
import random
98
import re
@@ -12,7 +11,6 @@
1211
import sys
1312
import traceback
1413
import warnings
15-
from collections import namedtuple
1614
from collections.abc import Callable
1715
from datetime import datetime, timezone
1816
from http import HTTPStatus
@@ -26,6 +24,7 @@
2624
BaseModel,
2725
BeforeValidator,
2826
ConfigDict,
27+
Field,
2928
ValidationError,
3029
WithJsonSchema,
3130
field_validator,
@@ -64,18 +63,36 @@
6463
}
6564
# Match something that looks like a four digit year
6665
YEAR_RE = re.compile(r"^\d{4}")
67-
TELEMETRY = namedtuple(
68-
"TELEMETRY",
69-
[
70-
"timing",
71-
"status_code",
72-
"client_addr",
73-
"app",
74-
"request_uri",
75-
"vhost",
76-
"valid",
77-
],
78-
)
66+
67+
68+
class TELEMETRY(BaseModel):
69+
timing: Annotated[
70+
float, Field(description="Request processing time in seconds")
71+
]
72+
status_code: Annotated[int, Field(description="HTTP response code")]
73+
client_addr: Annotated[
74+
str | None,
75+
Field(
76+
pattern=(
77+
r"^(([0-9]{1,3}\.){3}[0-9]{1,3}|"
78+
r"([a-fA-F0-9:]+:+)+[a-fA-F0-9]+)$"
79+
),
80+
description="Valid IPv4/IPv6 address",
81+
),
82+
] = None
83+
app: Annotated[
84+
str | None, Field(description="App generating this telemetry")
85+
] = None
86+
request_uri: Annotated[str | None, Field(description="Request URI")] = None
87+
vhost: Annotated[str | None, Field(description="virtual host")] = None
88+
valid: Annotated[
89+
datetime,
90+
Field(
91+
description="Timestamp when this telemetry record was generated"
92+
),
93+
]
94+
95+
7996
TELEMETRY_PREFIX = "Telemetry "
8097
# A rsyslog socket established via akrherz/infra-ansible that is a side-door
8198
# to send rsyslog messages without systemd intercepting them and filling
@@ -213,9 +230,7 @@ def write_telemetry(data: TELEMETRY) -> bool:
213230
# 141 is local1.notice and is critical to make this work.
214231
# The TELEMETRY_PREFIX becomes the syslog tag
215232
payload = (
216-
"<141>"
217-
+ TELEMETRY_PREFIX
218-
+ json.dumps(data._asdict(), separators=(",", ":"), sort_keys=True)
233+
"<141>" + TELEMETRY_PREFIX + data.model_dump_json(indent=None)
219234
).encode("utf-8")
220235
# We need to avoid syslog as systemd/journal will intercept this and
221236
# fill logs quickly.
@@ -621,13 +636,13 @@ def _iemapp_emit_telemetry(
621636
end_time = datetime.now(timezone.utc)
622637
write_telemetry(
623638
TELEMETRY(
624-
(end_time - start_time).total_seconds(),
625-
status_code,
626-
environ.get("REMOTE_ADDR"),
627-
environ.get("SCRIPT_NAME"),
628-
environ.get("REQUEST_URI"),
629-
environ.get("HTTP_HOST"),
630-
end_time.strftime(ISO8601),
639+
timing=(end_time - start_time).total_seconds(),
640+
status_code=status_code,
641+
client_addr=environ.get("REMOTE_ADDR"),
642+
app=environ.get("SCRIPT_NAME"),
643+
request_uri=environ.get("REQUEST_URI"),
644+
vhost=environ.get("HTTP_HOST"),
645+
valid=end_time.strftime(ISO8601),
631646
)
632647
)
633648

tests/test_webutil.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Tests for webutil."""
22

33
import random
4-
from datetime import datetime
4+
from datetime import datetime, timezone
55
from typing import Annotated, Optional, Union
66
from zoneinfo import ZoneInfo
77

@@ -32,6 +32,20 @@
3232
)
3333

3434

35+
def test_telemetry_bad_ip():
36+
"""Test that we do not allow a bad IP within TELEMETRY."""
37+
with pytest.raises(ValueError):
38+
TELEMETRY(
39+
timing=1,
40+
status_code=200,
41+
client_addr="not_an_ip",
42+
app="test",
43+
request_uri="",
44+
vhost="",
45+
valid=datetime.now().strftime(ISO8601),
46+
)
47+
48+
3549
def test_xss_detect_script_tag():
3650
assert _is_xss_payload("<script>alert('xss')</script>")
3751

@@ -554,14 +568,15 @@ def test_disable_parse_times():
554568

555569
def test_add_telemetry():
556570
"""Test adding something to the rsyslog sidedoor socket."""
571+
now = datetime.now(timezone.utc)
557572
data = TELEMETRY(
558573
timing=1,
559574
status_code=200,
560575
client_addr=None,
561576
app="test",
562577
request_uri="",
563578
vhost="",
564-
valid=datetime.now().strftime(ISO8601),
579+
valid=now.strftime(ISO8601),
565580
)
566581
socket_mock = mock.MagicMock()
567582
cm_mock = mock.MagicMock()
@@ -574,10 +589,10 @@ def test_add_telemetry():
574589
socket_mock.sendto.assert_called_once_with(
575590
b"<141>Telemetry "
576591
+ (
577-
b'{"app":"test","client_addr":null,"request_uri":"",'
578-
b'"status_code":200,"timing":1,"valid":"'
579-
+ data.valid.encode("utf-8")
580-
+ b'","vhost":""}'
592+
b'{"timing":1.0,"status_code":200,"client_addr":null,'
593+
b'"app":"test","request_uri":"","vhost":"","valid":"'
594+
+ now.strftime(ISO8601).encode("utf-8")
595+
+ b'"}'
581596
),
582597
RSYSLOG_SIDEDOOR_SOCKET,
583598
)

0 commit comments

Comments
 (0)