We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c5aaf19 commit cb1d0ddCopy full SHA for cb1d0dd
1 file changed
sglang.sh
@@ -631,7 +631,7 @@ cmd_qwen36_35b_nvfp4() {
631
--quantization modelopt_mixed \
632
--mem-fraction-static 0.85 \
633
--context-length "${ctx}" \
634
- --max-running-requests 2 \
+ --max-running-requests 4 \
635
--attention-backend flashinfer \
636
--linear-attn-backend triton \
637
--linear-attn-prefill-backend triton \
0 commit comments