fix: dwq

2025-08-27 14:53:40 -07:00 · 2025-08-27 14:53:40 -07:00 · b3573bb36a
commit b3573bb36a
parent 1e1a893b8b
1 changed files with 83 additions and 32 deletions
--- a/115
+++ b/115
@ -13,7 +13,7 @@ docker:
    docker image prune -f
 uv:
-    uv venv
+    uv venv --clear
    just uv_install
    uv run hf auth login
@ -54,7 +54,7 @@ mlx_create hf_url quant lm_studio_path org="mlx-community" upload_repo="false" c
        fi
    done
-# just mlx_create_dynamic "Qwen/Qwen3-14B" 5 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true false
+# just mlx_create_dynamic "Qwen/Qwen3-14B" 4 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true false
 # https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/LEARNED_QUANTS.md
 mlx_create_dynamic hf_url low high lm_studio_path org="mlx-community" upload_repo="false" clean="true":
    #!/usr/bin/env bash
@ -62,11 +62,30 @@ mlx_create_dynamic hf_url low high lm_studio_path org="mlx-community" upload_rep
    repo_name=$(basename {{hf_url}})
    rm -r {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit || true
-    uv run mlx_lm.dynamic_quant \
+    sanitized_name=$(echo "$repo_name" | tr '/' '_')
-        --model {{hf_url}} \
+    sensitivity_file="sensitivities/${sanitized_name}-{{low}}bit-{{high}}bit_sensitivities.json"
-        --low-bits {{low}} \
+
-        --high-bits {{high}} \
+    if [[ -f "$sensitivity_file" ]]; then
-        --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit
+        uv run mlx_lm.dynamic_quant \
            --model {{hf_url}} \
            --low-bits {{low}} \
            --high-bits {{high}} \
            --accumulation-dtype bfloat16 \
            --sensitivities "$sensitivity_file" \
            --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit
    else
        uv run mlx_lm.dynamic_quant \
            --model {{hf_url}} \
            --low-bits {{low}} \
            --high-bits {{high}} \
            --accumulation-dtype bfloat16 \
            --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit
    fi
    if [[ -f "${sanitized_name}_sensitivities.json" ]]; then
        mv "sensitivities/${sanitized_name}_sensitivities.json" "$sensitivity_file"
        echo "Saved sensitivities to $sensitivity_file"
    fi
    if [[ {{upload_repo}} == "true" ]]; then
        uv run mlx_lm.upload \
@ -79,40 +98,66 @@ mlx_create_dynamic hf_url low high lm_studio_path org="mlx-community" upload_rep
    fi
-# just mlx_create_dwq "Qwen/Qwen3-30B-A3B" "4" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true false
+# just mlx_create_dwq "Qwen/Qwen3-30B-A3B" "4" "8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true false
 # https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/LEARNED_QUANTS.md
 # https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/quant/dwq.py
-mlx_create_dwq hf_url quant lm_studio_path org="mlx-community" upload_repo="false" clean="true":
+mlx_create_dwq hf_url quant teacher_q lm_studio_path org="mlx-community" upload_repo="false" clean="true" :
    #!/usr/bin/env bash
    just uv_install
    repo_name=$(basename {{hf_url}})
    teacher_q="8"
    just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-${teacher_q}bit"
-    for q in {{quant}}; do
+    if [[ "{{teacher_q}}" == "16" ]]; then
-        rm {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ
+        for q in {{quant}}; do
            echo -e '\nConverting {{hf_url}} to '"$q"'-bit DWQ quantization\n'
            just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ"
-        echo -e '\nConverting {{hf_url}} to '"$q"'-bit DWQ quantization\n'
+            uv run mlx_lm.dwq \
-        uv run mlx_lm.dwq \
+                --model {{hf_url}} \
-            --model {{hf_url}} \
+                --bits ${q} \
-            --quantized-model {{org}}/${repo_name}-${teacher_q}bit \
+                # --group-size 32 \
-            --bits ${q} \
+                # --num-samples 512 \
-            --group-size 32 \
+                --batch-size 1 \
-            --num-samples 512 \
+                # --max-seq-length 512 \
-            --batch-size 1 \
+                --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ
            --max-seq-length 512 \
            --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-${teacher_q}bit
-        if [[ {{upload_repo}} == "true" ]]; then
+            if [[ {{upload_repo}} == "true" ]]; then
-            uv run mlx_lm.upload \
+                uv run mlx_lm.upload \
-                --path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-${teacher_q}bit \
+                    --path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ \
-                --upload-repo {{org}}/${repo_name}-${q}bit-DWQ-${teacher_q}bit
+                    --upload-repo {{org}}/${repo_name}-${q}bit-DWQ
-        fi
+            fi
-        if [[ {{clean}} == "true" ]]; then
+            if [[ {{clean}} == "true" ]]; then
-            just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-${teacher_q}bit"
+                just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ"
-        fi
+            fi
-    done
+        done
    else
        for q in {{quant}}; do
            echo -e '\nConverting {{hf_url}} to '"$q"'-bit DWQ quantization, with teacher_q = {{teacher_q}}\n'
            just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-{{teacher_q}}bit"
            just mlx_create "{{hf_url}}" "{{teacher_q}}" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian false true
            uv run mlx_lm.dwq \
                --model {{hf_url}} \
                --quantized-model {{org}}/${repo_name}-{{teacher_q}}bit \
                --bits ${q} \
                # --group-size 32 \
                # --num-samples 512 \
                --batch-size 1 \
                # --max-seq-length 512 \
                --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-{{teacher_q}}bit
            if [[ {{upload_repo}} == "true" ]]; then
                uv run mlx_lm.upload \
                    --path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-{{teacher_q}}bit \
                    --upload-repo {{org}}/${repo_name}-${q}bit-DWQ-{{teacher_q}}bit
            fi
            if [[ {{clean}} == "true" ]]; then
                just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-{{teacher_q}}bit"
            fi
        done
    fi
 clean_hf:
    rm -r ~/.cache/huggingface/hub/*
@ -148,6 +193,9 @@ process_single_model hf_url:
    echo "Processing quantizations for $model..."
    just mlx_create "$model" "3 4 5 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    # just mlx_create_dynamic "$model" 5 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    # just mlx_create_dynamic "$model" 4 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    # just mlx_create_dwq "$model" "5" "8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    rclone copyto -P --fast-list --copy-links --transfers 32 --multi-thread-streams 32 \
        "$HOME/.cache/huggingface/hub/$model_cache_name" \
@ -171,6 +219,9 @@ create_all:
    #!/usr/bin/env bash
    # List of models to process
    models=(
        # Qwen/Qwen3-1.7B
        # Qwen/Qwen3-4B-Instruct-2507
        # Qwen/Qwen3-4B-Thinking-2507
        # Qwen/Qwen3-30B-A3B-Instruct-2507
        # Qwen/Qwen3-30B-A3B-Thinking-2507
        # "Qwen/Qwen3-Coder-30B-A3B-Instruct"