llm-inference-benchmarks

This commit is contained in:
Elijah McMorris 2025-06-08 21:35:50 -07:00
parent b9599bb0d4
commit da7d9e6884
Signed by: NexVeridian
SSH key fingerprint: SHA256:bsA1SKZxuEcEVHAy3gY1HUeM5ykRJl0U0kQHQn0hMg8
2 changed files with 223 additions and 0 deletions

View file

@ -11,3 +11,43 @@ docker:
docker run -d --rm -p 80:80 --name nexveridian-web nexveridian-web:latest
rm -rf result
docker image prune -f
# just mlx_create "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models" "false"
mlx_create hf_url quant lm_studio_path upload_repo="false":
#!/usr/bin/env bash
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}"
for q in {{quant}}; do
echo -e '\nConverting {{hf_url}} to '"$q"'-bit quantization\n'
repo_name=$(basename {{hf_url}})
rm {{lm_studio_path}}/mlx-community/${repo_name}-${q}bit
if [[ {{upload_repo}} == "true" ]]; then
uv run mlx_lm.convert \
--hf-path {{hf_url}} \
-q \
--q-bits ${q} \
--upload-repo mlx-community/${repo_name}-${q}bit \
--mlx-path {{lm_studio_path}}/mlx-community/${repo_name}-${q}bit
else
uv run mlx_lm.convert \
--hf-path {{hf_url}} \
-q \
--q-bits ${q} \
--mlx-path {{lm_studio_path}}/mlx-community/${repo_name}-${q}bit
fi
done
just clean_lmstudio "{{hf_url}}" "{{quant}}" "{{lm_studio_path}}"
clean_hf:
rm -r ~/.cache/huggingface/hub/*
# just clean_lmstudio "Qwen/QwQ-32B" "4 6 8" "/Users/elijahmcmorris/.cache/lm-studio/models"
clean_lmstudio hf_url quant lm_studio_path:
#!/usr/bin/env bash
repo_name=$(basename {{hf_url}})
for q in {{quant}}; do
rm -r {{lm_studio_path}}/mlx-community/${repo_name}-${q}bit || true
done