llamacpp 跑100K起步能达到16t/s GTX1060G 6G
@echo off
chcp 437 > nul
title Qwen3.6-35B-A3B-Uncensored
:: ========== Colors ==========
set "ESC=["
set "RED=%ESC%91m"
set "GREEN=%ESC%92m"
set "YELLOW=%ESC%93m"
set "BLUE=%ESC%94m"
set "PURPLE=%ESC%95m"
set "CYAN=%ESC%96m"
set "WHITE=%ESC%97m"
set "RESET=%ESC%0m"
:: ========== Paths ==========
set SERVER=C:\Users\AI\Llama Server\llama.cpp\bin\llama-server.exe
set MODEL=C:\Users\AI\Llama Server\llama.cpp\models\Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf
set MMPROJ=C:\Users\AI\Llama Server\llama.cpp\models\mmproj-Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive-f16.gguf
set SLOTS=C:\Users\AI\Llama Server\llama.cpp\data\slots
:: ========== File Checks ==========
if not exist "%MODEL%" (echo %RED%[X] Model file missing%RESET% & pause & exit /b 1)
if not exist "%MMPROJ%" (echo %RED%[X] mmproj file missing%RESET% & pause & exit /b 1)
if not exist "%SERVER%" (echo %RED%[X] Server binary missing%RESET% & pause & exit /b 1)
if not exist "%SLOTS%" mkdir "%SLOTS%" 2>nul
cls
echo.
echo %PURPLE% +---------------------------------------------------+%RESET%
echo %PURPLE% ^|%RESET% %GREEN%Qwen3.6-35B-A3B-Uncensored%RESET% %PURPLE%^|%RESET%
echo %PURPLE% ^|%RESET% %CYAN%HauhauCS Aggressive ^| MoE ^| Q4_K_M%RESET% %PURPLE%^|%RESET%
echo %PURPLE% +---------------------------------------------------+%RESET%
echo.
echo %CYAN% CUDA 12.8 self-compiled -- GPU: 999L -- CPU MoE: 999 -- Context: 100K -- np: 2%RESET%
echo %CYAN% KV unified -- reasoning: off -- rope: 2.5 -- flash-attn%RESET%
echo %YELLOW% Port: 8080 -- Host: 0.0.0.0 -- Host: 127.0.0.1 %RESET%
echo.
echo %WHITE% Model: Q4_K_M (~20 GB) + mmproj F16 (~860 MB)%RESET%
echo %WHITE% Path: C:\Users\AI\Llama Server\llama.cpp\models\%RESET%
echo.
"%SERVER%" ^
-m "%MODEL%" ^
--mmproj "%MMPROJ%" ^
--host 0.0.0.0 --port 8080 ^
--n-gpu-layers 999 ^
--n-cpu-moe 999 ^
-t 8 ^
--ctx-size 200000 ^
-np 2 ^
--batch-size 512 ^
--ubatch-size 256 ^
--cache-type-k q4_0 ^
--cache-type-v q4_0 ^
--cache-ram 5120 ^
--rope-scale 2.5 ^
--reasoning on ^
--no-mmap ^
--slot-save-path "%SLOTS%" ^
--no-warmup --prio 2 ^
--temp 0.80 --top-k 100 --top-p 0.82 --min-p 0.12 --repeat-penalty 1.00 ^
--alias "Qwen3.6-35B-Uncensored" ^
--timeout 300 ^
--ui --metrics ^
--flash-attn auto
pause

opencode真的可用完整一套流畅,但是在openclaw里就会出现问题!
AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。
更多推荐



所有评论(0)