Skip to content

Commit c44f56e

Browse files
author
Tom Lasswell
committed
fix: Gate BLE dispatch behind SKU allowlist (#59)
BLE advertisement visibility is not proof the device accepts our BLE command frames. Before this change, any Govee device whose advert matched a name prefix or manufacturer_id was enrolled for BLE command dispatch — but multiple RGBIC SKUs (H6072, H61E1, H60B0, H612A) advertise BLE and silently drop our writes. Writes succeeded at the HCI layer, so _try_ble_command returned True and REST was never attempted, leaving control commands undelivered. v2026.4.4's optimistic grace period masked the UI flipflop but didn't retry via REST, so the command was still lost. Introduce BLE_COMMAND_SUPPORTED_MODELS in api/ble.py — an allowlist of SKUs with verified BLE command support. _handle_ble_advertisement now refuses to enroll devices whose SKU isn't on the list, logging once per SKU with instructions to open an issue for additions. Non- allowlisted devices stay cloud-only regardless of what they advertise. Seed list: H6199 (confirmed per #59 at-9 report). Tests: 658 passing (1 new allowlist-enforcement test). Refs: docs/_research/2026-04-13_ble-demotion-issue-59.md
1 parent f6b6948 commit c44f56e

5 files changed

Lines changed: 304 additions & 2 deletions

File tree

custom_components/govee/api/ble.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,21 @@
7777
# Beshelmek/govee_ble_lights `SEGMENTED_MODELS` (light.py:35).
7878
SEGMENTED_MODELS: frozenset[str] = frozenset({"H6053", "H6072", "H6102", "H6199"})
7979

80+
# SKUs with verified BLE command support. BLE command dispatch is only
81+
# attempted for devices on this list — advertising over BLE is not
82+
# sufficient proof that the device will accept our command frames.
83+
# Issue #59: multiple RGBIC SKUs (H6072, H61E1, H60B0, H612A) advertise
84+
# BLE but silently drop our writes, leaving commands unacknowledged and
85+
# the UI flipflopping. Until we can programmatically probe a device for
86+
# real BLE command capability, this allowlist is the correctness floor.
87+
#
88+
# How to extend: confirm via field testing that BLE commands (power,
89+
# brightness, RGB) actually change device state, then add the SKU here
90+
# and note the test result in the PR.
91+
BLE_COMMAND_SUPPORTED_MODELS: frozenset[str] = frozenset({
92+
"H6199", # RGBIC — confirmed working per issue #59 (at-9 report, ~10s lag).
93+
})
94+
8095
# Packet head byte used for all command frames.
8196
_PACKET_HEAD_COMMAND = 0x33
8297

custom_components/govee/coordinator.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@
3737
BluetoothCallbackMatcher,
3838
BluetoothScanningMode,
3939
)
40-
from .api.ble import GoveeBLEDevice, SEGMENTED_MODELS
40+
from .api.ble import (
41+
BLE_COMMAND_SUPPORTED_MODELS,
42+
GoveeBLEDevice,
43+
SEGMENTED_MODELS,
44+
)
4145

4246
HAS_BLUETOOTH = True
4347
except ImportError: # pragma: no cover — HA installs without Bluetooth
@@ -210,6 +214,10 @@ def __init__(
210214
# populated dynamically from Bluetooth advertisements.
211215
self._ble_devices: dict[str, GoveeBLEDevice] = {} if HAS_BLUETOOTH else {}
212216

217+
# SKUs for which we've already logged "advertised but not on the
218+
# BLE command allowlist" — avoid spamming the log on every advert.
219+
self._ble_ignored_skus_logged: set[str] = set()
220+
213221
# Track in-flight power-off commands so segment entities can
214222
# avoid racing with a concurrent device power-off (issue #16).
215223
self._pending_power_off: set[str] = set()
@@ -466,6 +474,24 @@ def _handle_ble_advertisement(self, service_info) -> None:
466474
if matched_id is None:
467475
return
468476

477+
# BLE advertisement visibility is not the same as BLE command
478+
# capability (issue #59). Some SKUs advertise BLE but silently
479+
# drop command frames — enrolling them in the dispatch path
480+
# makes every control command disappear. Only enroll SKUs whose
481+
# BLE command set is verified.
482+
if ble_sku not in BLE_COMMAND_SUPPORTED_MODELS:
483+
if ble_sku not in self._ble_ignored_skus_logged:
484+
self._ble_ignored_skus_logged.add(ble_sku)
485+
_LOGGER.info(
486+
"%s (SKU=%s) is advertising BLE but is not on the BLE "
487+
"command allowlist. Staying cloud-only. If BLE commands "
488+
"are known to work for this SKU, please open a GitHub "
489+
"issue referencing #59 so it can be added.",
490+
self._devices[matched_id].name,
491+
ble_sku,
492+
)
493+
return
494+
469495
if matched_id not in self._ble_devices:
470496
self._ble_devices[matched_id] = GoveeBLEDevice(
471497
service_info.device,

custom_components/govee/manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@
2323
"cryptography>=41.0.0"
2424
],
2525
"ssdp": [],
26-
"version": "2026.4.5",
26+
"version": "2026.4.6",
2727
"zeroconf": []
2828
}
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
# BLE Demotion for Silently-Failing Transport (#59)
2+
3+
**Date**: 2026-04-13
4+
**Scope**: Issue #59 — WiFi-enabled Govee devices whose BLE advertisements are heard by HA but whose BLE commands go unacknowledged. The Phase 2 multi-transport dispatch (commit `43a7d01`, v2026.4.2/4.3) prefers BLE on every call and never falls back to REST when BLE "succeeds" at the HCI layer but the device ignores the write. v2026.4.4's optimistic grace period hides the UI flipflop but does not deliver the command.
5+
**Status**: Ready for implementation. Recommends a hybrid fix: SKU allowlist (ship now) + state-reconciliation demotion (follow-up).
6+
7+
---
8+
9+
## Summary
10+
11+
BLE writes go out via `write_gatt_char(..., response=False)` — fire-and-forget at the HCI layer. `_try_ble_command` returns `True` the moment the write completes locally, regardless of whether the device was in RF range or even supports the command. Four SKUs reported in #59 (H6072, H61E1, H60B0, H612A) advertise BLE but don't respond to our current command set, so every control attempt silently disappears. v2026.4.4's 10–15s grace period prevents the UI from flipping back but doesn't trigger a REST retry — the command is lost. Recommend shipping a small SKU allowlist immediately (keeps BLE enabled only for verified SKUs) while building a state-reconciliation demotion mechanism that auto-disables BLE for any device whose BLE "successes" aren't confirmed by the next poll or push.
12+
13+
---
14+
15+
## Research Questions
16+
17+
| # | Question | Answer |
18+
|---|---|---|
19+
| 1 | What does `_try_ble_command` actually verify? | Nothing beyond local HCI acceptance. `api/ble.py:337-343` uses `response=False`. There is no ACK, no GATT read-back, no device-side confirmation. |
20+
| 2 | Why isn't REST attempted after a silent BLE failure? | `coordinator.py:931-935`: any `True` return from `_try_ble_command` returns immediately without REST fallback. There's no "verify and retry" path. |
21+
| 3 | Which SKUs are affected? | Per user reports: H6072, H61E1, H60B0, H612A (broken). Works: H605C (does not advertise BLE), H6199 (slow but functional). All broken SKUs advertise BLE and get enrolled in `self._ble_devices` on first advert. |
22+
| 4 | Is there any BLE allowlist or quality gate today? | No. Any advertisement matching `Govee_*`/`ihoment_*`/`GBK_*` or manufacturer_id 0x8803 enrolls the device for BLE command dispatch. `SEGMENTED_MODELS` only determines encoding, not whether to attempt BLE. |
23+
| 5 | Does v2026.4.4's grace period help? | Only cosmetically. It preserves the optimistic state across one poll cycle so the UI doesn't flipflop. The command still didn't reach the device. |
24+
| 6 | Why did "new API key" appear to fix it for one user? | Almost certainly coincidental. API key is only used by REST; BLE dispatch doesn't touch it. The restart or reconfigure likely masked the issue temporarily (adapter reset, proximity change, or re-enrollment). |
25+
26+
---
27+
28+
## Findings
29+
30+
### BLE "success" ≠ device received the command
31+
32+
`api/ble.py:337-343`:
33+
```python
34+
async def _write(self, frame: bytes) -> None:
35+
async with self._lock:
36+
client = await self._ensure_connected()
37+
await client.write_gatt_char(
38+
WRITE_CHARACTERISTIC_UUID, frame, response=False
39+
)
40+
```
41+
42+
`response=False` means **write-without-response**: Bleak returns as soon as the HCI layer queues the frame. No ACK is required from the device. For out-of-range or non-command-capable BLE peripherals this returns success while the frame silently drops on the RF layer.
43+
44+
`coordinator.py:1020-1056` wraps the write in try/except and returns True on any non-exception path. There is no positive confirmation that the device processed the command.
45+
46+
### Dispatch preference is unconditional
47+
48+
`coordinator.py:931-935`:
49+
```python
50+
if HAS_BLUETOOTH and device_id in self._ble_devices:
51+
if await self._try_ble_command(device_id, command):
52+
self._apply_optimistic_update(device_id, command)
53+
self.async_set_updated_data(self._states)
54+
return True
55+
# BLE failed — fall through to REST
56+
```
57+
58+
BLE is tried first every time and wins every time `_try_ble_command` returns True. There's no per-device quality signal that can disable BLE for devices where it's known to not work. The Phase 2 multi-transport design assumed BLE success = command delivered, which this issue disproves.
59+
60+
### Affected SKUs are all BLE-advertising RGBIC strips/lamps
61+
62+
| SKU | Type | In `SEGMENTED_MODELS` | Reported | Likely root cause |
63+
|---|---|---|---|---|
64+
| H6072 | RGBICWW Floor Lamp | Yes | Broken | Advertises BLE but doesn't accept our command frames |
65+
| H61E1 | LED Strip Light M1 | No | Broken | Same |
66+
| H60B0 | Uplighter Floor Lamp | No | Broken | Same |
67+
| H612A | Sofa Strip Light | No | Broken | Same |
68+
| H605C | RGBIC TV Backlight | No | Works | Doesn't advertise BLE; stays cloud-only |
69+
| H6199 | RGBIC | Yes | Slow (~10s) | BLE works but marginal RF |
70+
71+
The pattern: **advertisement-capable ≠ command-capable**. The Phase 1 BLE library was validated against a narrow SKU set (likely H6072/H6199/H6102/H6053) and the command frames work for those — but enrollment happens for any Govee SKU that advertises.
72+
73+
### v2026.4.4 optimistic grace period is a UX patch, not a fix
74+
75+
The grace period (`OPTIMISTIC_GRACE_CAP_SECONDS=15`) preserves `state.power_state` during a window after a control command so the API poll can't flip it back. That's correct for the "device briefly out of BLE range" case where the command does eventually land. It does **not** retry the command via REST. For #59 devices that will never receive BLE, the grace window expires and the user sees the correct ("off") state — but their "turn on" command was lost.
76+
77+
---
78+
79+
## Compatibility Analysis
80+
81+
| Change | Files | Risk | User impact |
82+
|---|---|---|---|
83+
| SKU allowlist (ship immediately) | `api/ble.py`, `coordinator.py` (enroll gate) | Low | Most users will lose BLE acceleration temporarily until their SKU is added. Acceptable trade-off for correctness. |
84+
| State-reconciliation demotion | `models/transport.py`, `coordinator.py`, `models/state.py` | Medium | Preserves BLE for devices where it works; auto-disables for devices where it doesn't. Requires one or two "failed" commands before demotion, so first commands may still be lost. |
85+
| Options toggle "prefer BLE transport" | `config_flow.py`, `coordinator.py` | Low | Gives paranoid users an escape hatch. |
86+
87+
All three changes compose cleanly. None require framework or dependency changes.
88+
89+
---
90+
91+
## Recommendation
92+
93+
Ship **both**:
94+
95+
1. **Immediate: SKU allowlist for BLE enrollment.** Any device whose SKU isn't on a small, verified list stays cloud-only. Ships to master in 24 hours. Users who report BLE working for additional SKUs get them added next release. This prevents further #59 reports without shipping speculative auto-demotion logic.
96+
97+
2. **Follow-up: State-reconciliation demotion.** After any optimistic BLE command, verify the next MQTT push or REST poll agrees with the optimistic state. If three consecutive commands for a device show a post-command mismatch, demote BLE for that device until the next HA restart (or a configurable TTL). Resets on agreement. The existing `TransportHealth` dataclass already carries failure metadata — add three counters and one timestamp.
98+
99+
Do **not** rely solely on a config toggle. Most users won't know to flip it, and the default behavior needs to be correct.
100+
101+
### Comparison matrix
102+
103+
| Approach | Correctness | Latency on broken SKUs | User config | Code complexity |
104+
|---|---|---|---|---|
105+
| Allowlist | ✅ Perfect for listed SKUs | N/A — BLE skipped | None | Trivial |
106+
| State-reconciliation | ⚠️ 1-3 lost commands then demoted | ~2-3 poll cycles | None | Moderate |
107+
| Options toggle | ❌ Default still broken | Unchanged | Manual | Trivial |
108+
109+
---
110+
111+
## Implementation Sketch
112+
113+
### Step 1 — SKU allowlist (ships now)
114+
115+
**`custom_components/govee/api/ble.py`** (after `SEGMENTED_MODELS` at line 78):
116+
```python
117+
# SKUs with verified working BLE command dispatch. Devices not in this set
118+
# remain cloud-only even if they advertise BLE (issue #59 — many SKUs
119+
# advertise BLE but silently drop our command frames).
120+
BLE_COMMAND_SUPPORTED_MODELS: frozenset[str] = frozenset({
121+
"H6199", # RGBIC — confirmed working (slow but reliable)
122+
# Additional SKUs added as users confirm reliable BLE dispatch.
123+
})
124+
```
125+
126+
**`custom_components/govee/coordinator.py`** (inside `_handle_ble_advertisement`, before the `self._ble_devices[matched_id] = GoveeBLEDevice(...)` block):
127+
```python
128+
if ble_sku not in BLE_COMMAND_SUPPORTED_MODELS:
129+
# Keep the device cloud-only. It will still update advertisements
130+
# (so the connectivity sensor shows the BLE adapter is seeing it)
131+
# but commands won't be dispatched over BLE.
132+
return
133+
```
134+
135+
Import `BLE_COMMAND_SUPPORTED_MODELS` alongside `SEGMENTED_MODELS` at the top of coordinator.py.
136+
137+
### Step 2 — State-reconciliation demotion (follow-up)
138+
139+
**`custom_components/govee/models/transport.py`** — add to `TransportHealth`:
140+
```python
141+
# Consecutive state-poll disagreements after a "successful" optimistic
142+
# write via this transport. Reset on agreement. Used to auto-demote BLE
143+
# when the device silently drops commands (issue #59).
144+
mismatch_count: int = 0
145+
demoted_until: datetime | None = None
146+
```
147+
148+
Add a helper:
149+
```python
150+
def record_mismatch(self, now: datetime, threshold: int = 3,
151+
ttl: timedelta = timedelta(hours=1)) -> bool:
152+
self.mismatch_count += 1
153+
if self.mismatch_count >= threshold:
154+
self.demoted_until = now + ttl
155+
return True
156+
return False
157+
158+
def record_agreement(self) -> None:
159+
self.mismatch_count = 0
160+
```
161+
162+
**`custom_components/govee/coordinator.py`**:
163+
164+
After applying an optimistic write via BLE, stamp what we expect:
165+
```python
166+
# around line 931 (after _try_ble_command succeeds)
167+
state = self._states.get(device_id)
168+
if state:
169+
state.pending_ble_verification = self._extract_verifiable_field(command)
170+
```
171+
172+
In `_fetch_device_state` (after cloud poll) and in `_on_mqtt_state_update`:
173+
```python
174+
pending = existing_state.pending_ble_verification
175+
if pending:
176+
actual = getattr(state, pending.field)
177+
ble_health = self._transport_health[device_id]["ble"]
178+
if actual == pending.value:
179+
ble_health.record_agreement()
180+
else:
181+
if ble_health.record_mismatch(datetime.now(timezone.utc)):
182+
_LOGGER.warning(
183+
"Demoting BLE for %s after %d silent failures",
184+
device_id, ble_health.mismatch_count,
185+
)
186+
existing_state.pending_ble_verification = None
187+
```
188+
189+
In `async_control_device`:
190+
```python
191+
ble_health = self._transport_health.get(device_id, {}).get("ble")
192+
if (
193+
HAS_BLUETOOTH
194+
and device_id in self._ble_devices
195+
and not self._ble_demoted(ble_health)
196+
):
197+
...
198+
```
199+
200+
Where `_ble_demoted` checks `ble_health.demoted_until` against `now`.
201+
202+
Verification scope: for #59 purposes, verifying `power_state` only is sufficient — it's the clearest binary signal. Brightness and color mismatches are noisier (RGB colorRgb=0 during scenes etc).
203+
204+
### Step 3 — Options toggle (optional, ship with Step 1)
205+
206+
Add `CONF_DISABLE_BLE_COMMANDS` in `config_flow.py` options. Defaults to `False`. When `True`, skip `_try_ble_command` entirely. Useful for users who know their setup needs cloud-only.
207+
208+
---
209+
210+
## Risks
211+
212+
- **SKU allowlist too aggressive.** If the initial allowlist is empty or near-empty, users currently benefiting from BLE (MQTT-offline fallback, low-latency commands) regress. Mitigation: seed with H6199 (reported working) and invite users to open issues reporting SKUs that work in the field.
213+
- **State reconciliation false positives.** If the cloud API lags behind a real BLE command (e.g. AWS propagation delay), we might demote BLE on a device where it's actually working. Mitigation: high threshold (3+ mismatches), long TTL reset on agreement, and only demote when the mismatch is on `power_state` (the most reliable field).
214+
- **Device-side caching.** Some Govee devices cache the last-seen cloud state. A BLE-only change may not be reflected in the cloud poll immediately. Mitigation: MQTT push is the primary verification signal when available.
215+
- **Users with BLE-only (no cloud) scenarios.** Currently unsupported by the integration (API key is required). Not affected.
216+
217+
---
218+
219+
## References
220+
221+
- Issue #59: https://github.com/lasswellt/govee-homeassistant/issues/59
222+
- Commit `43a7d01` (Phase 2 multi-transport): added BLE-first dispatch without verification.
223+
- Commit `dfd0ba2` / v2026.4.4: grace period that masked the UI flipflop but didn't deliver commands.
224+
- Prior research `2026-04-13_open-bugs-triage.md` (#60 analysis): identified that BLE writes are fire-and-forget at the HCI layer.
225+
- `api/ble.py:337-343``_write()` uses `response=False`.
226+
- `coordinator.py:931-935` — BLE-first dispatch with no quality gate.
227+
- `coordinator.py:1020-1056``_try_ble_command()` records success on any non-exception path.
228+
- HA Bluetooth ACK semantics: https://developers.home-assistant.io/docs/core/bluetooth/api/

tests/test_coordinator.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,12 +1249,19 @@ def _make_coordinator_with_devices(self, devices: dict[str, GoveeDevice]):
12491249
# Inject the names that the conditional import would have set
12501250
coord_mod.GoveeBLEDevice = RealBLEDevice
12511251
coord_mod.SEGMENTED_MODELS = RealSegModels
1252+
# Broad allowlist so the enrollment-path tests exercise real logic
1253+
# regardless of the production-default allowlist content. The
1254+
# enforcement path is covered by its own dedicated test.
1255+
coord_mod.BLE_COMMAND_SUPPORTED_MODELS = frozenset(
1256+
{"H6053", "H6072", "H6102", "H6199", "H6076", "H6126"}
1257+
)
12521258

12531259
coord = object.__new__(coord_mod.GoveeCoordinator)
12541260
coord._devices = devices
12551261
coord._ble_devices = {}
12561262
coord._transport_health = {}
12571263
coord._states = {}
1264+
coord._ble_ignored_skus_logged = set()
12581265
return coord
12591266

12601267
def _make_service_info(self, name: str, address: str):
@@ -1268,6 +1275,32 @@ def _make_service_info(self, name: str, address: str):
12681275
info.advertisement = MagicMock()
12691276
return info
12701277

1278+
def test_sku_not_on_allowlist_is_ignored(self, sample_device):
1279+
"""Advertisements for SKUs outside the BLE allowlist must not enroll
1280+
the device for command dispatch (issue #59). Advertising BLE is not
1281+
proof the device will accept BLE command frames."""
1282+
import custom_components.govee.coordinator as coord_mod
1283+
from custom_components.govee.api.ble import GoveeBLEDevice as RealBLEDevice
1284+
from custom_components.govee.api.ble import SEGMENTED_MODELS as RealSegModels
1285+
1286+
coord_mod.GoveeBLEDevice = RealBLEDevice
1287+
coord_mod.SEGMENTED_MODELS = RealSegModels
1288+
# Narrow allowlist — does NOT include the advertised SKU below.
1289+
coord_mod.BLE_COMMAND_SUPPORTED_MODELS = frozenset({"H9999"})
1290+
1291+
coord = object.__new__(coord_mod.GoveeCoordinator)
1292+
coord._devices = {"AA:BB:CC:DD:EE:FF:00:11": sample_device}
1293+
coord._ble_devices = {}
1294+
coord._transport_health = {}
1295+
coord._states = {}
1296+
coord._ble_ignored_skus_logged = set()
1297+
1298+
info = self._make_service_info("Govee_H6072_754B", "AA:BB:CC:DD:EE:FF")
1299+
coord._handle_ble_advertisement(info)
1300+
1301+
assert "AA:BB:CC:DD:EE:FF:00:11" not in coord._ble_devices
1302+
assert "H6072" in coord._ble_ignored_skus_logged
1303+
12711304
def test_single_sku_match_creates_ble_device(self, sample_device):
12721305
"""BLE advertisement matching a single cloud device by SKU creates a GoveeBLEDevice."""
12731306
coord = self._make_coordinator_with_devices(

0 commit comments

Comments
 (0)