dwq
This commit is contained in:
parent
7f13e0ff91
commit
47b2ebf74c
2 changed files with 80 additions and 17 deletions
|
@ -56,12 +56,14 @@ https://huggingface.co/mlx-community
|
||||||
|
|
||||||
git clone git@github.com:NexVeridian/NexVeridian-web.git
|
git clone git@github.com:NexVeridian/NexVeridian-web.git
|
||||||
```bash
|
```bash
|
||||||
|
just uv
|
||||||
|
|
||||||
|
just mlx_create "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" "mlx-community" fasle false
|
||||||
|
# or
|
||||||
uv venv
|
uv venv
|
||||||
uv pip install huggingface_hub hf_transfer mlx_lm
|
uv pip install huggingface_hub hf_transfer mlx_lm
|
||||||
uv run huggingface-cli login
|
uv run huggingface-cli login
|
||||||
|
|
||||||
just mlx_create "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" "mlx-community" "false"
|
|
||||||
# or
|
|
||||||
uv run mlx_lm.convert --hf-path Qwen/QwQ-32B -q --q-bits 4 --upload-repo mlx-community/QwQ-32B-4bit --mlx-path /Users/elijahmcmorris/.cache/lm-studio/models/mlx-community/QwQ-32B-4bit
|
uv run mlx_lm.convert --hf-path Qwen/QwQ-32B -q --q-bits 4 --upload-repo mlx-community/QwQ-32B-4bit --mlx-path /Users/elijahmcmorris/.cache/lm-studio/models/mlx-community/QwQ-32B-4bit
|
||||||
```
|
```
|
||||||
or use https://huggingface.co/spaces/mlx-community/mlx-my-repo
|
or use https://huggingface.co/spaces/mlx-community/mlx-my-repo
|
||||||
|
@ -77,12 +79,6 @@ or use https://huggingface.co/spaces/mlx-community/mlx-my-repo
|
||||||
|------|-------|-------|-------|----------|
|
|------|-------|-------|-------|----------|
|
||||||
| 0.7 | 0.00 | 0.80 | 20 | 1.5 |
|
| 0.7 | 0.00 | 0.80 | 20 | 1.5 |
|
||||||
|
|
||||||
## QWQ
|
|
||||||
| Temp | Min P | Top P | Repeat P |
|
|
||||||
|------|-------|-------|----------|
|
|
||||||
| 0.7 | 0.05 | 0.95 | - |
|
|
||||||
| 1.5 | 0.10 | 1.00 | - |
|
|
||||||
|
|
||||||
## Prompt Template
|
## Prompt Template
|
||||||
```
|
```
|
||||||
{%- if tools %}
|
{%- if tools %}
|
||||||
|
|
85
justfile
85
justfile
|
@ -12,16 +12,21 @@ docker:
|
||||||
rm -rf result
|
rm -rf result
|
||||||
docker image prune -f
|
docker image prune -f
|
||||||
|
|
||||||
# just mlx_create "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" "mlx-community" "false"
|
uv:
|
||||||
mlx_create hf_url quant lm_studio_path org="mlx-community" upload_repo="false":
|
uv venv
|
||||||
|
uv pip install huggingface_hub hf_transfer mlx_lm "mlx_lm[quant]"
|
||||||
|
uv run huggingface-cli login
|
||||||
|
|
||||||
|
# just mlx_create "Qwen/Qwen3-30B-A3B" "3 4 5 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian false true
|
||||||
|
mlx_create hf_url quant lm_studio_path org="mlx-community" upload_repo="false" clean="true":
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}"
|
repo_name=$(basename {{hf_url}})
|
||||||
|
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}"
|
||||||
|
|
||||||
for q in {{quant}}; do
|
for q in {{quant}}; do
|
||||||
echo -e '\nConverting {{hf_url}} to '"$q"'-bit quantization\n'
|
|
||||||
repo_name=$(basename {{hf_url}})
|
|
||||||
rm {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit
|
rm {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit
|
||||||
|
|
||||||
|
echo -e '\nConverting {{hf_url}} to '"$q"'-bit quantization\n'
|
||||||
if [[ {{upload_repo}} == "true" ]]; then
|
if [[ {{upload_repo}} == "true" ]]; then
|
||||||
uv run mlx_lm.convert \
|
uv run mlx_lm.convert \
|
||||||
--hf-path {{hf_url}} \
|
--hf-path {{hf_url}} \
|
||||||
|
@ -36,18 +41,80 @@ mlx_create hf_url quant lm_studio_path org="mlx-community" upload_repo="false":
|
||||||
--q-bits ${q} \
|
--q-bits ${q} \
|
||||||
--mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit
|
--mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ {{clean}} == "true" ]]; then
|
||||||
|
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}"
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}"
|
# just mlx_create_dynamic "Qwen/Qwen3-30B-A3B" 4 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian false false
|
||||||
|
# https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/LEARNED_QUANTS.md
|
||||||
|
mlx_create_dynamic hf_url low high lm_studio_path org="mlx-community" upload_repo="false" clean="true":
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
repo_name=$(basename {{hf_url}})
|
||||||
|
rm -r {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}-{{high}}bit || true
|
||||||
|
|
||||||
|
uv run mlx_lm.dynamic_quant \
|
||||||
|
--model {{hf_url}} \
|
||||||
|
--low-bits {{low}} \
|
||||||
|
--high-bits {{high}} \
|
||||||
|
--mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}-{{high}}bit
|
||||||
|
|
||||||
|
if [[ {{upload_repo}} == "true" ]]; then
|
||||||
|
uv run mlx_lm.upload \
|
||||||
|
--path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}-{{high}}bit \
|
||||||
|
--upload-repo {{org}}/${repo_name}-{{low}}-{{high}}bit
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ {{clean}} == "true" ]]; then
|
||||||
|
rm -r {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}-{{high}}bit || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# just mlx_create_dwq "Qwen/Qwen3-30B-A3B" "4" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian false false
|
||||||
|
# https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/LEARNED_QUANTS.md
|
||||||
|
mlx_create_dwq hf_url quant lm_studio_path org="mlx-community" upload_repo="false" clean="true":
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
repo_name=$(basename {{hf_url}})
|
||||||
|
teacher_q="8"
|
||||||
|
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-${teacher_q}bit"
|
||||||
|
|
||||||
|
just mlx_create "{{hf_url}}" "${teacher_q}" "{{lm_studio_path}}" "{{org}}" "false" "false"
|
||||||
|
just clean_lmstudio "{{hf_url}}" "${teacher_q}" "{{lm_studio_path}}" "{{org}}"
|
||||||
|
|
||||||
|
for q in {{quant}}; do
|
||||||
|
rm {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ
|
||||||
|
|
||||||
|
echo -e '\nConverting {{hf_url}} to '"$q"'-bit DWQ quantization\n'
|
||||||
|
uv run mlx_lm.dwq \
|
||||||
|
--model {{hf_url}} \
|
||||||
|
--quantized-model {{org}}/${repo_name}-${teacher_q}bit \
|
||||||
|
--bits ${q} \
|
||||||
|
--group-size 32 \
|
||||||
|
--num-samples 1024 \
|
||||||
|
--batch-size 1 \
|
||||||
|
--max-seq-length 512 \
|
||||||
|
--mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-${teacher_q}bit
|
||||||
|
|
||||||
|
if [[ {{upload_repo}} == "true" ]]; then
|
||||||
|
uv run mlx_lm.upload \
|
||||||
|
--path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-${teacher_q}bit \
|
||||||
|
--upload-repo {{org}}/${repo_name}-${q}bit-DWQ-${teacher_q}bit
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ {{clean}} == "true" ]]; then
|
||||||
|
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-${teacher_q}bit"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
clean_hf:
|
clean_hf:
|
||||||
rm -r ~/.cache/huggingface/hub/*
|
rm -r ~/.cache/huggingface/hub/*
|
||||||
|
|
||||||
# just clean_lmstudio "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" "mlx-community"
|
# just clean_lmstudio "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian "-DWQ"
|
||||||
clean_lmstudio hf_url quant lm_studio_path org="mlx-community":
|
clean_lmstudio hf_url quant lm_studio_path org="mlx-community" type="":
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
repo_name=$(basename {{hf_url}})
|
repo_name=$(basename {{hf_url}})
|
||||||
|
|
||||||
for q in {{quant}}; do
|
for q in {{quant}}; do
|
||||||
rm -r {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit || true
|
rm -r {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit{{type}} || true
|
||||||
done
|
done
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue