将你的GGUF模型发布到Ollama社区
·
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
make
---
pip install -r requirements.txt
---
python convert.py ../Llama3-70B-Chinese-Chat/ --vocab-type bpe
---
./quantize ../Llama3-70B-Chinese-Chat/ggml-model-f32.gguf ../ggml-model-q4_0.gguf Q4_0
---
echo "FROM ggml-model-q4_0.gguf" >> Modelfile
echo "SYSTEM 你是一个有用的助手" >> Modelfile
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER num_keep "24"
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
{{ .Response }}<|eot_id|>
"""
---
ollama create wangrongsheng/llama3-8b-chinese-chat -f ./Modelfile
---
ollama push wangrongsheng/llama3-8b-chinese-chat
更多推荐
已为社区贡献8条内容
所有评论(0)