@@ -163,9 +163,10 @@ inline bool is_window_visible(HWND hwnd)
163163 return hwnd && IsWindowVisible (hwnd);
164164}
165165
166- // ── Windows screen capture via GDI PrintWindow + BitBlt ───────────────────────
167- // PrintWindow captures the window's own backbuffer — works even when the
168- // window is partially occluded, unlike BitBlt from the desktop DC.
166+ // ── Windows screen capture via GDI BitBlt ─────────────────────────────────────
167+ // bgra_buf_ is pre-allocated in init() and reused every grab() call so we never
168+ // heap-allocate inside the hot loop (the old code did `cv::Mat bgra(h,w,CV_8UC4)`
169+ // on every single frame which triggered a malloc+free pair at capture rate).
169170
170171class PlatformCapture
171172{
@@ -193,6 +194,12 @@ class PlatformCapture
193194 return false ;
194195 }
195196 SelectObject (hdc_mem_, hbmp_);
197+
198+ // Pre-allocate the intermediate BGRA buffer once for this region size.
199+ // grab() reuses it every frame via GetDIBits writing directly into
200+ // bgra_buf_.data — no heap allocation in the hot path.
201+ bgra_buf_.create (h_, w_, CV_8UC4 );
202+
196203 ready_ = true ;
197204 return true ;
198205 }
@@ -202,24 +209,24 @@ class PlatformCapture
202209 if (!ready_)
203210 return false ;
204211
205- // Capture the window into our memory DC
206212 HDC hwnd_dc = GetDC (hwnd_);
207213 BitBlt (hdc_mem_, 0 , 0 , w_, h_, hwnd_dc, x_, y_, SRCCOPY );
208214 ReleaseDC (hwnd_, hwnd_dc);
209215
210- // Pull pixels from HBITMAP into a cv::Mat
211216 BITMAPINFOHEADER bi{};
212217 bi.biSize = sizeof (bi);
213218 bi.biWidth = w_;
214- bi.biHeight = -h_; // negative = top-down
219+ bi.biHeight = -h_; // negative = top-down row order
215220 bi.biPlanes = 1 ;
216221 bi.biBitCount = 32 ;
217222 bi.biCompression = BI_RGB ;
218223
219- cv::Mat bgra (h_, w_, CV_8UC4 );
220- GetDIBits (hdc_mem_, hbmp_, 0 , h_, bgra .data ,
224+ // Write directly into the pre-allocated buffer — zero heap activity
225+ GetDIBits (hdc_mem_, hbmp_, 0 , h_, bgra_buf_ .data ,
221226 reinterpret_cast <BITMAPINFO *>(&bi), DIB_RGB_COLORS );
222- cv::cvtColor (bgra, out, cv::COLOR_BGRA2BGR );
227+
228+ // Convert in-place into caller's Mat (reuses its buffer if same size)
229+ cv::cvtColor (bgra_buf_, out, cv::COLOR_BGRA2BGR );
223230 return true ;
224231 }
225232
@@ -242,11 +249,14 @@ class PlatformCapture
242249 DeleteObject (hbmp_);
243250 hbmp_ = nullptr ;
244251 }
252+ bgra_buf_.release ();
245253 ready_ = false ;
246254 }
255+
247256 HWND hwnd_{nullptr };
248257 HDC hdc_mem_{nullptr };
249258 HBITMAP hbmp_{nullptr };
259+ cv::Mat bgra_buf_; // pre-allocated BGRA scratch — never reallocated
250260 int x_{0 }, y_{0 }, w_{0 }, h_{0 };
251261 bool ready_{false };
252262};
@@ -380,7 +390,8 @@ class XWin
380390 if (rd)
381391 XFree (rd);
382392 }
383- int cw = attr.width - fl - fr, ch = attr.height - ft - fb;
393+ int cw = attr.width - fl - fr;
394+ int ch = attr.height - ft - fb;
384395 if (cw <= 0 || ch <= 0 )
385396 return std::nullopt ;
386397 return Rect{attr.x + fl, attr.y + ft, cw, ch};
@@ -389,7 +400,8 @@ class XWin
389400 bool is_visible (Window w) const
390401 {
391402 XWindowAttributes attr;
392- return XGetWindowAttributes (dpy_, w, &attr) && attr.map_state == IsViewable;
403+ return XGetWindowAttributes (dpy_, w, &attr) &&
404+ attr.map_state == IsViewable;
393405 }
394406
395407 Display *dpy () const { return dpy_; }
@@ -452,6 +464,7 @@ class PlatformCapture
452464 if (!XShmGetImage (dpy_, root_, img_, x_, y_, AllPlanes))
453465 return false ;
454466 cv::Mat raw (h_, w_, CV_8UC4 , img_->data );
467+ // copyTo reuses out's buffer when dimensions match — no heap alloc
455468 cv::cvtColor (raw, out, cv::COLOR_BGRA2BGR );
456469 return true ;
457470 }
@@ -474,6 +487,7 @@ class PlatformCapture
474487 img_ = nullptr ;
475488 ready_ = false ;
476489 }
490+
477491 Display *dpy_{nullptr };
478492 Window root_{0 };
479493 XShmSegmentInfo shm_{};
@@ -512,7 +526,6 @@ class CaptureThread
512526 cv::Mat kernel = cv::getStructuringElement (cv::MORPH_ELLIPSE , {3 , 3 });
513527
514528#if defined(_WIN32)
515- // Windows: screen dimensions from system metrics
516529 int screen_w = GetSystemMetrics (SM_CXVIRTUALSCREEN );
517530 int screen_h = GetSystemMetrics (SM_CYVIRTUALSCREEN );
518531 int screen_l = GetSystemMetrics (SM_XVIRTUALSCREEN );
@@ -523,9 +536,6 @@ class CaptureThread
523536 auto next_frame = clk::now ();
524537 while (running_.load (std::memory_order_relaxed))
525538 {
526- // ── FPS cap — sleep until next deadline, then advance it ───────
527- // Capping next_frame to now() prevents lag accumulation after
528- // prolonged sleeps (e.g. window-not-visible 100ms pauses).
529539 if (cfg_.target_fps > 0 )
530540 {
531541 auto now = clk::now ();
@@ -534,15 +544,14 @@ class CaptureThread
534544 std::this_thread::sleep_until (next_frame);
535545 next_frame += std::chrono::microseconds (1'000'000 / cfg_.target_fps );
536546 }
537-
538547 if (!is_window_visible (wid_))
539548 {
540549 std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
541550 continue ;
542551 }
543552 auto rect = get_content_rect (wid_);
544553#else
545- // Linux: own Display connection per thread (Xlib not thread-safe)
554+ // Linux: own Display connection per thread (Xlib is not thread-safe)
546555 XWin xwin;
547556 Display *dpy = xwin.dpy ();
548557 int screen_w = DisplayWidth (dpy, DefaultScreen (dpy));
@@ -554,9 +563,6 @@ class CaptureThread
554563 auto next_frame = clk::now ();
555564 while (running_.load (std::memory_order_relaxed))
556565 {
557- // ── FPS cap — sleep until next deadline, then advance it ───────
558- // Capping next_frame to now() prevents lag accumulation after
559- // prolonged sleeps (e.g. window-not-visible 100ms pauses).
560566 if (cfg_.target_fps > 0 )
561567 {
562568 auto now = clk::now ();
@@ -565,7 +571,6 @@ class CaptureThread
565571 std::this_thread::sleep_until (next_frame);
566572 next_frame += std::chrono::microseconds (1'000'000 / cfg_.target_fps );
567573 }
568-
569574 if (!xwin.is_visible (wid_))
570575 {
571576 std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
@@ -600,11 +605,10 @@ class CaptureThread
600605 {
601606#if defined(_WIN32)
602607 if (!cap.init (wid_, left, top, cw, ch))
603- {
604608#else
605609 if (!cap.init (dpy, xwin.root (), left, top, cw, ch))
606- {
607610#endif
611+ {
608612 std::this_thread::sleep_for (std::chrono::milliseconds (50 ));
609613 continue ;
610614 }
@@ -623,17 +627,19 @@ class CaptureThread
623627 }
624628
625629 // ── BGR color mask ────────────────────────────────────────────
630+ // bounds() returns cached lo/hi — no sscanf or arithmetic here
626631 std::array<uint8_t , 3 > lo, hi;
627632 cfg_.bounds (lo, hi);
628633 cv::inRange (slot.bgr ,
629634 cv::Scalar (lo[0 ], lo[1 ], lo[2 ]),
630635 cv::Scalar (hi[0 ], hi[1 ], hi[2 ]),
631636 slot.mask );
632637
633- // ── Morphology — skip when mask is empty ─────────────────────
634- // cv::morphologyEx on an all-zero mask wastes CPU. The early
635- // check is cheap (single pass to count non-zeros) and avoids the
636- // kernel convolution entirely when nothing matched inRange.
638+ // ── Morphology ────────────────────────────────────────────────
639+ // countNonZero is O(w×h) but saves the O(w×h) morphologyEx when
640+ // the mask is empty (common during cooldown / off-target frames).
641+ // Worth keeping when empty frames are the majority; profile if
642+ // your scene keeps targets on-screen most of the time.
637643 if (cv::countNonZero (slot.mask ) > 0 )
638644 cv::morphologyEx (slot.mask , slot.mask , cv::MORPH_OPEN , kernel);
639645
0 commit comments