nexveridian-web/justfile

run:
    zola serve

update:
    nix flake update

watch:
    typst watch Elijah_McMorris_Resume.typ

pdf:
    typst compile Elijah_McMorris_Resume.typ
    mv Elijah_McMorris_Resume.pdf static/

docker:
    nix build .#packages.x86_64-linux.my-docker
    docker load < ./result
    docker rm -f nexveridian-web
    docker run -d --rm -p 80:80 --name nexveridian-web nexveridian-web:latest
    rm -rf result
    docker image prune -f

uv:
    uv venv --clear
    just uv_install
    uv run hf auth login

uv_install:
    # uv pip install -U huggingface_hub hf_transfer mlx_lm "mlx_lm[train]" tiktoken blobfile
    uv pip install -U huggingface_hub hf_transfer "git+https://github.com/ml-explore/mlx-lm@main" "git+https://github.com/ml-explore/mlx-lm@main[train]" tiktoken blobfile

# just mlx_create "Qwen/Qwen3-30B-A3B" "3 4 5 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
mlx_create hf_url quant lm_studio_path org="mlx-community" upload_repo="false" clean="true":
    #!/usr/bin/env bash
    just uv_install
    repo_name=$(basename {{hf_url}})
    just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}"

    for q in {{quant}}; do
        rm {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit

        echo -e '\nConverting {{hf_url}} to '"$q"'-bit quantization\n'
        if [[ {{upload_repo}} == "true" ]]; then
            uv run mlx_lm.convert \
                --hf-path {{hf_url}} \
                -q \
                --q-bits ${q} \
                --trust-remote-code \
                --upload-repo {{org}}/${repo_name}-${q}bit \
                --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit
        else
            uv run mlx_lm.convert \
                --hf-path {{hf_url}} \
                -q \
                --q-bits ${q} \
                --trust-remote-code \
                --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit
        fi

        if [[ {{clean}} == "true" ]]; then
            just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}"
        fi
    done

# just mlx_create_dynamic "Qwen/Qwen3-14B" 4 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true false
# https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/LEARNED_QUANTS.md
mlx_create_dynamic hf_url low high lm_studio_path org="mlx-community" upload_repo="false" clean="true":
    #!/usr/bin/env bash
    just uv_install
    repo_name=$(basename {{hf_url}})
    rm -r {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit || true

    sanitized_name=$(echo "$repo_name" | tr '/' '_')
    sensitivity_file="sensitivities/${sanitized_name}-{{low}}bit-{{high}}bit_sensitivities.json"

    if [[ -f "$sensitivity_file" ]]; then
        uv run mlx_lm.dynamic_quant \
            --model {{hf_url}} \
            --low-bits {{low}} \
            --high-bits {{high}} \
            --accumulation-dtype bfloat16 \
            --sensitivities "$sensitivity_file" \
            --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit
    else
        uv run mlx_lm.dynamic_quant \
            --model {{hf_url}} \
            --low-bits {{low}} \
            --high-bits {{high}} \
            --accumulation-dtype bfloat16 \
            --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit
    fi

    if [[ -f "${sanitized_name}_sensitivities.json" ]]; then
        mv "sensitivities/${sanitized_name}_sensitivities.json" "$sensitivity_file"
        echo "Saved sensitivities to $sensitivity_file"
    fi

    if [[ {{upload_repo}} == "true" ]]; then
        uv run mlx_lm.upload \
            --path {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit \
            --upload-repo {{org}}/${repo_name}-{{low}}bit-{{high}}bit
    fi

    if [[ {{clean}} == "true" ]]; then
        rm -r {{lm_studio_path}}/{{org}}/${repo_name}-{{low}}bit-{{high}}bit || true
    fi


# just mlx_create_dwq "Qwen/Qwen3-30B-A3B" "4" "8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true false
# https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/LEARNED_QUANTS.md
# https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/quant/dwq.py
mlx_create_dwq hf_url quant teacher_q lm_studio_path org="mlx-community" upload_repo="false" clean="true" :
    #!/usr/bin/env bash
    just uv_install
    repo_name=$(basename {{hf_url}})

    if [[ "{{teacher_q}}" == "16" ]]; then
        for q in {{quant}}; do
            echo -e '\nConverting {{hf_url}} to '"$q"'-bit DWQ quantization\n'
            just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ"

            uv run mlx_lm.dwq \
                --model {{hf_url}} \
                --bits ${q} \
                # --group-size 32 \
                # --num-samples 512 \
                --batch-size 1 \
                # --max-seq-length 512 \
                --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ

            if [[ {{upload_repo}} == "true" ]]; then
                uv run mlx_lm.upload \
                    --path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ \
                    --upload-repo {{org}}/${repo_name}-${q}bit-DWQ
            fi

            if [[ {{clean}} == "true" ]]; then
                just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ"
            fi
        done
    else
        for q in {{quant}}; do
            echo -e '\nConverting {{hf_url}} to '"$q"'-bit DWQ quantization, with teacher_q = {{teacher_q}}\n'
            just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-{{teacher_q}}bit"

            just mlx_create "{{hf_url}}" "{{teacher_q}}" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian false true

            uv run mlx_lm.dwq \
                --model {{hf_url}} \
                --quantized-model {{org}}/${repo_name}-{{teacher_q}}bit \
                --bits ${q} \
                # --group-size 32 \
                # --num-samples 512 \
                --batch-size 1 \
                # --max-seq-length 512 \
                --mlx-path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-{{teacher_q}}bit

            if [[ {{upload_repo}} == "true" ]]; then
                uv run mlx_lm.upload \
                    --path {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit-DWQ-{{teacher_q}}bit \
                    --upload-repo {{org}}/${repo_name}-${q}bit-DWQ-{{teacher_q}}bit
            fi

            if [[ {{clean}} == "true" ]]; then
                just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}" "{{org}}" "-DWQ-{{teacher_q}}bit"
            fi
        done
    fi

clean_hf:
    rm -r ~/.cache/huggingface/hub/*

# just clean_lmstudio "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian "-DWQ"
clean_lmstudio hf_url quant lm_studio_path org="mlx-community" type="":
    #!/usr/bin/env bash
    repo_name=$(basename {{hf_url}})

    for q in {{quant}}; do
        rm -r {{lm_studio_path}}/{{org}}/${repo_name}-${q}bit{{type}} || true
    done

process_single_model hf_url:
    #!/usr/bin/env bash
    export HF_HUB_CACHE="/Volumes/hf-cache/huggingface/hub"
    # Store original HF_HUB_CACHE
    ORIGINAL_HF_HUB_CACHE="${HF_HUB_CACHE:-}"

    model="{{hf_url}}"
    echo "Processing model: $model"

    # Convert model path to cache directory format (org--model)
    model_cache_name=$(echo "$model" | sed 's/\//--/g' | sed 's/^/models--/')

    echo "Copying $model_cache_name from NAS..."
    rclone copyto -P --fast-list --copy-links --transfers 32 --multi-thread-streams 32 \
        "tower:hf-cache/huggingface/hub/$model_cache_name" \
        "$HOME/.cache/huggingface/hub/$model_cache_name"

    # Set HF_HUB_CACHE to local cache
    export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"

    echo "Processing quantizations for $model..."
    just mlx_create "$model" "3 4 5 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    # just mlx_create_dynamic "$model" 5 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    # just mlx_create_dynamic "$model" 4 8 "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true
    # just mlx_create_dwq "$model" "5" "8" "/Users/elijahmcmorris/.cache/lm-studio/models" NexVeridian true true

    rclone copyto -P --fast-list --copy-links --transfers 32 --multi-thread-streams 32 \
        "$HOME/.cache/huggingface/hub/$model_cache_name" \
        "tower:hf-cache/huggingface/hub/$model_cache_name"

    # Clean up local model cache
    echo "Cleaning up local cache for $model..."
    # rm -rf "$HOME/.cache/huggingface/hub/$model_cache_name"
    just clean_hf || true

    # Reset HF_HUB_CACHE to original value
    if [[ -n "$ORIGINAL_HF_HUB_CACHE" ]]; then
        export HF_HUB_CACHE="$ORIGINAL_HF_HUB_CACHE"
    else
        unset HF_HUB_CACHE
    fi

    echo "Completed processing $model"

create_all:
    #!/usr/bin/env bash
    # List of models to process
    models=(
        # Qwen/Qwen3-1.7B
        # Qwen/Qwen3-4B-Instruct-2507
        # Qwen/Qwen3-4B-Thinking-2507
        # Qwen/Qwen3-30B-A3B-Instruct-2507
        # Qwen/Qwen3-30B-A3B-Thinking-2507
        # "Qwen/Qwen3-Coder-30B-A3B-Instruct"
        # "Qwen/Qwen3-Coder-480B-A35B-Instruct"
        # "openai/gpt-oss-20b"
        # "openai/gpt-oss-120b"
        # janhq/Jan-v1-4B
        # moonshotai/Kimi-VL-A3B-Thinking-2506
        # nvidia/OpenReasoning-Nemotron-1.5B
        # nvidia/OpenReasoning-Nemotron-7B
        # nvidia/OpenReasoning-Nemotron-14B
        # nvidia/OpenReasoning-Nemotron-32B
        # ByteDance-Seed/Seed-OSS-36B-Instruct
    )

    for model in "${models[@]}"; do
        echo "Processing model: $model"
        just process_single_model "$model"
    done