mirror of
https://github.com/vxcontrol/pentagi.git
synced 2026-05-03 13:30:47 +00:00
feat(docker): update Dockerfile and add new vLLM configurations
- Added new provider configurations for vLLM Qwen 3.6 in both thinking and non-thinking modes. - Updated the Dockerfile to include the new configuration files for vLLM Qwen 3.6 and ensure proper setup for deployment.
This commit is contained in:
+3
-1
@@ -171,8 +171,10 @@ COPY examples/configs/ollama-qwen332b-fp16-tc.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/ollama-qwq32b-fp16-tc.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/openrouter.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/novita.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/vllm-qwen3.5-27b-fp8.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/vllm-qwen3.5-27b-fp8-no-think.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/vllm-qwen3.5-27b-fp8.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/vllm-qwen3.6-27b-fp8-no-think.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/vllm-qwen3.6-27b-fp8.provider.yml /opt/pentagi/conf/
|
||||
COPY examples/configs/vllm-qwen332b-fp16.provider.yml /opt/pentagi/conf/
|
||||
|
||||
COPY LICENSE /opt/pentagi/LICENSE
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
# Qwen3.6-27B FP8 Provider Configuration - NON-THINKING MODE
|
||||
# Based on official Qwen recommendations for vLLM inference
|
||||
# Architecture: Hybrid 75% DeltaNet + 25% Full Attention (48+16 layers)
|
||||
# Context: 262K native, expandable to 1M with YaRN
|
||||
# Vision: VLM with Vision Encoder (uses VRAM even for text-only tasks)
|
||||
#
|
||||
# Non-thinking mode is disabled via extra_body parameter
|
||||
# Recommended sampling parameters:
|
||||
# - General tasks: temp=0.7, top_p=0.8, top_k=20, min_p=0.0, pp=1.5, rp=1.0
|
||||
# - Reasoning tasks: temp=1.0, top_p=0.95, top_k=20, min_p=0.0, pp=1.5, rp=1.0
|
||||
|
||||
simple:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
simple_json:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
json: true
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
primary_agent:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
assistant:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
generator:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
refiner:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
adviser:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
reflector:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
searcher:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
enricher:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
coder:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
installer:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
pentester:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
@@ -0,0 +1,174 @@
|
||||
# Qwen3.6-27B FP8 Provider Configuration - THINKING MODE (default)
|
||||
# Based on official Qwen recommendations for vLLM inference
|
||||
# Architecture: Hybrid 75% DeltaNet + 25% Full Attention (48+16 layers)
|
||||
# Context: 262K native, expandable to 1M with YaRN
|
||||
# Vision: VLM with Vision Encoder (uses VRAM even for text-only tasks)
|
||||
#
|
||||
# Thinking mode is enabled by default (no extra_body needed)
|
||||
# Recommended sampling parameters:
|
||||
# - General tasks: temp=1.0, top_p=0.95, top_k=20, min_p=0.0, pp=1.5, rp=1.0
|
||||
# - Precise coding: temp=0.6, top_p=0.95, top_k=20, min_p=0.0, pp=0.0, rp=1.0
|
||||
#
|
||||
# Non-thinking mode is disabled via extra_body parameter
|
||||
# Recommended sampling parameters:
|
||||
# - General tasks: temp=0.7, top_p=0.8, top_k=20, min_p=0.0, pp=1.5, rp=1.0
|
||||
# - Reasoning tasks: temp=1.0, top_p=0.95, top_k=20, min_p=0.0, pp=1.5, rp=1.0
|
||||
|
||||
simple:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
simple_json:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
json: true
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
primary_agent:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
assistant:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
generator:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
refiner:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
adviser:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
reflector:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 1.0
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
searcher:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
enricher:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.7
|
||||
top_k: 20
|
||||
top_p: 0.8
|
||||
min_p: 0.0
|
||||
presence_penalty: 1.5
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
coder:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.6
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 0.0
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
installer:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.6
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 0.0
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
|
||||
pentester:
|
||||
model: "Qwen/Qwen3.6-27B-FP8"
|
||||
temperature: 0.6
|
||||
top_k: 20
|
||||
top_p: 0.95
|
||||
min_p: 0.0
|
||||
presence_penalty: 0.0
|
||||
repetition_penalty: 1.0
|
||||
n: 1
|
||||
max_tokens: 32768
|
||||
Reference in New Issue
Block a user