Skip to content

Commit d744940

Browse files
Refactor: extract OmniStreamingVideoHandler base and QwenOmniStreamingVideoHandler (#4424)
Signed-off-by: NumberWan <wantszkin2003@gmail.com> Signed-off-by: lishunyang12 <125541396+lishunyang12@users.noreply.github.com> Co-authored-by: lishunyang12 <125541396+lishunyang12@users.noreply.github.com>
1 parent e957d16 commit d744940

4 files changed

Lines changed: 1041 additions & 886 deletions

File tree

tests/entrypoints/openai_api/test_serving_video_stream.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@
1414
import pytest
1515
from PIL import Image
1616

17-
from vllm_omni.entrypoints.openai import serving_video_stream, video_stream_envs
17+
from vllm_omni.entrypoints.openai import video_stream_base, video_stream_envs
1818
from vllm_omni.entrypoints.openai.serving_video_stream import (
19-
OmniStreamingVideoHandler,
19+
QwenOmniStreamingVideoHandler,
2020
StreamingVideoSessionConfig,
2121
)
22+
from vllm_omni.entrypoints.openai.video_stream_base import OmniStreamingVideoHandler
2223
from vllm_omni.outputs import OmniRequestOutput
2324

2425
pytestmark = [pytest.mark.core_model, pytest.mark.cpu]
@@ -149,7 +150,7 @@ async def _gen():
149150

150151
return _gen()
151152

152-
class CapturingHandler(OmniStreamingVideoHandler):
153+
class CapturingHandler(QwenOmniStreamingVideoHandler):
153154
async def _preprocess_to_engine_prompt(self, request):
154155
captured_requests.append(request)
155156
return {"prompt": "x"}
@@ -186,7 +187,7 @@ async def _gen():
186187

187188
return _gen()
188189

189-
class CapturingHandler(OmniStreamingVideoHandler):
190+
class CapturingHandler(QwenOmniStreamingVideoHandler):
190191
async def _preprocess_to_engine_prompt(self, request):
191192
captured_requests.append(request)
192193
return {"prompt": "x"}
@@ -223,7 +224,7 @@ async def _gen():
223224

224225
return _gen()
225226

226-
class CapturingHandler(OmniStreamingVideoHandler):
227+
class CapturingHandler(QwenOmniStreamingVideoHandler):
227228
async def _preprocess_to_engine_prompt(self, request):
228229
captured_requests.append(request)
229230
return {"prompt": "x"}
@@ -309,7 +310,7 @@ async def _gen():
309310

310311
return _gen()
311312

312-
class CapturingHandler(OmniStreamingVideoHandler):
313+
class CapturingHandler(QwenOmniStreamingVideoHandler):
313314
async def _preprocess_to_engine_prompt(self, request):
314315
return {"prompt": "x"}
315316

@@ -374,7 +375,7 @@ async def test_new_query_cancels_in_flight_query():
374375
query_cancelled = asyncio.Event()
375376
calls = 0
376377

377-
class BlockingHandler(OmniStreamingVideoHandler):
378+
class BlockingHandler(QwenOmniStreamingVideoHandler):
378379
async def _process_query(self, *args, **kwargs):
379380
nonlocal calls
380381
calls += 1
@@ -412,7 +413,7 @@ async def test_video_done_waits_for_in_flight_query():
412413
allow_finish = asyncio.Event()
413414
query_finished = asyncio.Event()
414415

415-
class BlockingHandler(OmniStreamingVideoHandler):
416+
class BlockingHandler(QwenOmniStreamingVideoHandler):
416417
async def _process_query(self, *args, **kwargs):
417418
query_started.set()
418419
await allow_finish.wait()
@@ -452,11 +453,11 @@ def blocked_decode(raw_bytes: bytes):
452453
release_decode.wait(timeout=2.0)
453454
return Image.open(io.BytesIO(raw_bytes)).convert("RGB")
454455

455-
class BlockingHandler(OmniStreamingVideoHandler):
456+
class BlockingHandler(QwenOmniStreamingVideoHandler):
456457
async def _process_query(self, *args, **kwargs):
457458
query_started.set()
458459

459-
monkeypatch.setattr(serving_video_stream, "_decode_frame_bytes", blocked_decode)
460+
monkeypatch.setattr(video_stream_base, "_decode_frame_bytes", blocked_decode)
460461

461462
ws = TimedWebSocket()
462463
handler = BlockingHandler(chat_service=object(), idle_timeout=5.0)
@@ -486,7 +487,7 @@ async def test_client_cannot_send_internal_frame_decode_failed_message():
486487
captured_frames: list[list[str]] = []
487488
frame = _b64(_make_jpeg())
488489

489-
class CapturingHandler(OmniStreamingVideoHandler):
490+
class CapturingHandler(QwenOmniStreamingVideoHandler):
490491
async def _process_query(
491492
self,
492493
websocket,
@@ -550,7 +551,7 @@ async def test_frame_filter_error_sends_invalid_image(monkeypatch):
550551
def fail_should_retain(self, frame_jpeg):
551552
raise ValueError("decode failed")
552553

553-
monkeypatch.setattr(serving_video_stream.FrameSimilarityFilter, "should_retain", fail_should_retain)
554+
monkeypatch.setattr(video_stream_base.FrameSimilarityFilter, "should_retain", fail_should_retain)
554555

555556
ws = TimedWebSocket()
556557
handler = OmniStreamingVideoHandler(chat_service=object(), idle_timeout=5.0)
@@ -579,7 +580,7 @@ async def _gen():
579580

580581
return _gen()
581582

582-
class CapturingHandler(OmniStreamingVideoHandler):
583+
class CapturingHandler(QwenOmniStreamingVideoHandler):
583584
async def _process_query_engine(
584585
self,
585586
websocket,
@@ -594,7 +595,7 @@ async def _process_query_engine(
594595
):
595596
captured_audio_lengths.append(len(audio_buffer))
596597

597-
monkeypatch.setattr(serving_video_stream, "_MAX_AUDIO_BUFFER_BYTES", 4)
598+
monkeypatch.setattr(video_stream_base, "_MAX_AUDIO_BUFFER_BYTES", 4)
598599

599600
ws = TimedWebSocket()
600601
handler = CapturingHandler(chat_service=object(), engine_client=EmptyEngine(), idle_timeout=5.0)
@@ -618,7 +619,7 @@ async def _process_query_engine(
618619

619620

620621
def test_build_messages_keeps_recent_history_text_only():
621-
handler = OmniStreamingVideoHandler(chat_service=object())
622+
handler = QwenOmniStreamingVideoHandler(chat_service=object())
622623
old_frame = _b64(_make_jpeg(1, 2, 3))
623624
current_frame = _b64(_make_jpeg(4, 5, 6))
624625
history = [

vllm_omni/entrypoints/openai/api_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@
132132
ReferenceImage,
133133
ReferenceVideo,
134134
)
135-
from vllm_omni.entrypoints.openai.serving_video_stream import OmniStreamingVideoHandler
135+
from vllm_omni.entrypoints.openai.serving_video_stream import create_streaming_video_handler
136136
from vllm_omni.entrypoints.openai.stage_params import (
137137
build_stage_sampling_params_list,
138138
get_default_sampling_params_list,
@@ -1070,7 +1070,7 @@ async def omni_init_app_state(
10701070
speech_service=state.openai_serving_speech,
10711071
)
10721072
state.openai_streaming_video = (
1073-
OmniStreamingVideoHandler(
1073+
create_streaming_video_handler(
10741074
chat_service=state.openai_serving_chat,
10751075
engine_client=engine_client,
10761076
)

0 commit comments

Comments
 (0)