From 41cc3048684923758f77a8434559f0d6c8f41e12 Mon Sep 17 00:00:00 2001 From: deva100 Date: Tue, 23 Dec 2025 06:48:33 +0000 Subject: [PATCH] [chore] add some automation bash script for BitNet Tech Report --- demo_benchmark.sh | 121 +++++++ run_paper_benchmarks.sh | 720 ++++++++++++++++++++++++++++++++++++++++ test_benchmark_setup.sh | 160 +++++++++ 3 files changed, 1001 insertions(+) create mode 100755 demo_benchmark.sh create mode 100755 run_paper_benchmarks.sh create mode 100755 test_benchmark_setup.sh diff --git a/demo_benchmark.sh b/demo_benchmark.sh new file mode 100755 index 0000000..8845a3f --- /dev/null +++ b/demo_benchmark.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +################################################################################ +# Quick Demo of Benchmark Automation +# This runs a subset of benchmarks to verify the script works +################################################################################ + +set -euo pipefail + +GREEN='\033[0;32m' +BLUE='\033[0;34m' +NC='\033[0m' + +STATS_DIR="stats/demo_$(date +%Y%m%d_%H%M%S)" +mkdir -p "${STATS_DIR}" + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}Quick Benchmark Demo${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" +echo "Output directory: ${STATS_DIR}" +echo "" + +# Test 1: Machine info +echo -e "${GREEN}[1/3] Collecting machine info...${NC}" +{ + echo "=== Machine Information ===" + echo "Architecture: $(uname -m)" + echo "CPU cores: $(nproc)" + echo "Timestamp: $(date)" + echo "" + lscpu | head -20 +} | tee "${STATS_DIR}/machine_info.txt" +echo "" + +# Test 2: Quick benchmark test +echo -e "${GREEN}[2/3] Running quick benchmark (2 threads only)...${NC}" +if [[ -f "build/bin/llama-bench" ]] && [[ -f "models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_q6_k.gguf" ]]; then + ./build/bin/llama-bench \ + -m models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_q6_k.gguf \ + -p 128 -n 128 -t 1,2,4 -ngl 0 \ + 2>&1 | tee "${STATS_DIR}/bench_quick.txt" + + # Parse results + { + echo "# Quick Benchmark Results" + echo "" + echo "| Threads | Test | Tokens/sec |" + echo "|---------|------|------------|" + + awk -F '|' ' + /bitnet.*pp128/ || /bitnet.*tg128/ { + gsub(/^[[:space:]]+|[[:space:]]+$/, "", $6); + gsub(/^[[:space:]]+|[[:space:]]+$/, "", $7); + gsub(/^[[:space:]]+|[[:space:]]+$/, "", $8); + split($8, perf, "±"); + printf "| %7s | %4s | %10s |\n", $6, $7, perf[1]; + } + ' "${STATS_DIR}/bench_quick.txt" + } > "${STATS_DIR}/bench_results.md" + + echo "" + echo -e "${GREEN}Results saved to: ${STATS_DIR}/bench_results.md${NC}" + cat "${STATS_DIR}/bench_results.md" +else + echo "Skipping benchmark (model or binary not found)" +fi +echo "" + +# Test 3: Quick PPL test (one dataset only) +echo -e "${GREEN}[3/3] Running quick PPL test (wikitext-2 only, 2 embed types)...${NC}" +if [[ -f "build/bin/llama-perplexity" ]] && [[ -f "data/wikitext-2-raw/wiki.test.raw" ]]; then + { + echo "# Quick PPL Test" + echo "" + echo "| Embed Type | PPL |" + echo "|------------|-----|" + + for embed in i2_s q6_k; do + model="models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_${embed}.gguf" + if [[ -f "$model" ]]; then + echo "Testing: $embed..." + output=$(./build/bin/llama-perplexity \ + -m "$model" \ + -f data/wikitext-2-raw/wiki.test.raw \ + -t 4 -ngl 0 2>&1 || true) + + ppl=$(echo "$output" | awk ' + /Final estimate/ && /PPL/ { + if (match($0, /PPL[[:space:]]*=[[:space:]]*([0-9]+(\.[0-9]+)?)/, m)) { + print m[1]; + exit; + } + } + ') + + if [[ -n "$ppl" ]]; then + echo "| $embed | $ppl |" + else + echo "| $embed | N/A |" + fi + fi + done + } | tee "${STATS_DIR}/ppl_quick.md" + + echo "" + echo -e "${GREEN}Results saved to: ${STATS_DIR}/ppl_quick.md${NC}" +else + echo "Skipping PPL test (binary or dataset not found)" +fi +echo "" + +echo -e "${BLUE}========================================${NC}" +echo -e "${GREEN}Demo completed!${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" +echo "All results in: ${STATS_DIR}/" +echo "" +echo "To run the full automation script:" +echo " ./run_paper_benchmarks.sh" +echo "" diff --git a/run_paper_benchmarks.sh b/run_paper_benchmarks.sh new file mode 100755 index 0000000..975ddde --- /dev/null +++ b/run_paper_benchmarks.sh @@ -0,0 +1,720 @@ +#!/bin/bash + +################################################################################ +# Paper Benchmark Automation Script +# This script automates all experiments needed for the paper on both Intel and ARM +################################################################################ + +set -euo pipefail + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration +STATS_DIR="stats" +MODEL_NAME="BitNet-b1.58-2B-4T" +MODEL_DIR="models/${MODEL_NAME}" +HF_REPO="microsoft/${MODEL_NAME}" +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +MACHINE_INFO_FILE="${STATS_DIR}/machine_info_${TIMESTAMP}.txt" +BENCH_RESULTS_FILE="${STATS_DIR}/bench_results_${TIMESTAMP}.md" +BENCH_RAW_FILE="${STATS_DIR}/bench_raw_${TIMESTAMP}.txt" +PPL_RESULTS_FILE="${STATS_DIR}/ppl_results_${TIMESTAMP}.md" +PPL_CSV_FILE="${STATS_DIR}/ppl_results_${TIMESTAMP}.csv" + +# Create stats directory if not exists +mkdir -p "${STATS_DIR}" + +################################################################################ +# Helper Functions +################################################################################ + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +section_header() { + echo "" + echo "================================================================================" + echo -e "${GREEN}$1${NC}" + echo "================================================================================" +} + +################################################################################ +# Step 1: Machine Information and Environment Setup +################################################################################ + +step1_machine_info() { + section_header "STEP 1: Machine Information and Environment Setup" + + log_info "Collecting machine information..." + + { + echo "================================" + echo "Machine Information" + echo "================================" + echo "Timestamp: $(date)" + echo "" + + echo "--- System Architecture ---" + uname -a + echo "" + + echo "--- CPU Information ---" + if command -v lscpu &> /dev/null; then + lscpu + elif [[ -f /proc/cpuinfo ]]; then + cat /proc/cpuinfo + else + log_warning "Could not get CPU information" + fi + echo "" + + echo "--- CPU Cores ---" + NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "unknown") + echo "Number of CPU cores: ${NPROC}" + echo "" + + echo "--- Memory Information ---" + if command -v free &> /dev/null; then + free -h + elif command -v vm_stat &> /dev/null; then + vm_stat + else + log_warning "Could not get memory information" + fi + echo "" + + echo "--- Architecture Detection ---" + ARCH=$(uname -m) + echo "Architecture: ${ARCH}" + if [[ "${ARCH}" == "x86_64" ]]; then + echo "Platform: Intel/AMD x86_64" + elif [[ "${ARCH}" == "aarch64" ]] || [[ "${ARCH}" == "arm64" ]]; then + echo "Platform: ARM64" + else + echo "Platform: Other (${ARCH})" + fi + echo "" + + echo "--- Compiler Information ---" + if command -v clang &> /dev/null; then + clang --version + fi + if command -v gcc &> /dev/null; then + gcc --version + fi + if command -v cmake &> /dev/null; then + cmake --version + fi + echo "" + + echo "--- Python Environment ---" + python --version || python3 --version + if command -v conda &> /dev/null; then + conda --version + echo "Active conda environment: ${CONDA_DEFAULT_ENV:-none}" + fi + echo "" + + } | tee "${MACHINE_INFO_FILE}" + + log_success "Machine information saved to: ${MACHINE_INFO_FILE}" + + # Install dependencies according to README + log_info "Installing Python dependencies..." + if [[ -f requirements.txt ]]; then + pip install -r requirements.txt + log_success "Python dependencies installed" + else + log_warning "requirements.txt not found, skipping dependency installation" + fi +} + +################################################################################ +# Step 2: Build Project +################################################################################ + +step2_build() { + section_header "STEP 2: Building Project" + + log_info "Configuring CMake..." + cmake -B build -DCMAKE_BUILD_TYPE=Release + + log_info "Building project..." + cmake --build build --config Release + + log_success "Build completed successfully" +} + +################################################################################ +# Step 3: Download and Convert Model +################################################################################ + +step3_download_convert() { + section_header "STEP 3: Download and Convert Model" + + if [[ -d "${MODEL_DIR}" ]] && [[ -f "${MODEL_DIR}/ggml-model-f32.gguf" ]]; then + log_warning "Model directory already exists and contains f32 model, skipping download" + read -p "Do you want to re-download and convert? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + return + fi + fi + + # Create model directory + mkdir -p "${MODEL_DIR}" + + # Download from HuggingFace + log_info "Downloading model from HuggingFace: ${HF_REPO}" + if command -v huggingface-cli &> /dev/null; then + huggingface-cli download "${HF_REPO}" --local-dir "${MODEL_DIR}" + else + log_error "huggingface-cli not found. Please install it with: pip install huggingface_hub" + exit 1 + fi + + # Convert to f32 GGUF using the helper script + log_info "Converting model to f32 GGUF format..." + if [[ -f "utils/convert-helper-bitnet.py" ]]; then + # The script creates ggml-model-f32-bitnet.gguf, we'll rename it + python utils/convert-helper-bitnet.py "${MODEL_DIR}" + + # Rename the output to match expected name + if [[ -f "${MODEL_DIR}/ggml-model-f32-bitnet.gguf" ]]; then + mv "${MODEL_DIR}/ggml-model-f32-bitnet.gguf" "${MODEL_DIR}/ggml-model-f32.gguf" + fi + else + log_error "Convert helper script not found" + exit 1 + fi + + log_success "Model downloaded and converted to f32 GGUF" +} + +################################################################################ +# Step 4: Quantize Embeddings +################################################################################ + +step4_quantize_embeddings() { + section_header "STEP 4: Quantize Embeddings" + + log_info "Running embed_quant.sh to create different embedding quantization variants..." + + if [[ ! -f "embed_quant.sh" ]]; then + log_error "embed_quant.sh not found" + exit 1 + fi + + bash embed_quant.sh + + log_success "Embedding quantization completed" +} + +################################################################################ +# Step 5: Tune GEMM Block Sizes +################################################################################ + +step5_tune_gemm() { + section_header "STEP 5: Tune GEMM Block Sizes" + + log_info "Running GEMM block size tuning..." + + # Backup original tune script if needed + if [[ ! -f "tune_gemm_blocks.sh.bak" ]]; then + cp tune_gemm_blocks.sh tune_gemm_blocks.sh.bak + fi + + # Get number of threads + NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "8") + + # Update the tuning script to use a broader search space + log_info "Updating tune_gemm_blocks.sh for comprehensive search..." + + # Create a temporary tuning script with broader search + cat > tune_gemm_blocks_auto.sh << 'EOF' +#!/bin/bash +set -e + +HEADER_FILE="include/gemm-config.h" +BENCH_CMD="./build/bin/llama-bench -m models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_i2_s.gguf -p 128 -n 0 -t 16 -ngl 0" +BUILD_CMD="cmake --build build --config Release -j" + +ACT_PARALLEL_DEFINE=true + +# Expanded search space for better tuning +ROW_BLOCK_VALUES=(2 4 8) +COL_BLOCK_VALUES=(64 128 256) +PARALLEL_SIZE_VALUES=(2 4 8) + +BEST_PERF=0 +BEST_ROW_BLOCK=0 +BEST_COL_BLOCK=0 +BEST_PARALLEL_SIZE=0 +LOG_FILE="stats/tuning_log.csv" + +if [ -f "$HEADER_FILE" ]; then + cp "$HEADER_FILE" "${HEADER_FILE}.bak" +fi + +echo "Starting comprehensive tuning process..." +echo "row_block,col_block,parallel_size,tokens_per_second" > "$LOG_FILE" + +cleanup() { + echo "Restoring original header file..." + if [ -f "${HEADER_FILE}.bak" ]; then + mv "${HEADER_FILE}.bak" "$HEADER_FILE" + fi + echo "Tuning finished." + echo "Best: ROW_BLOCK=${BEST_ROW_BLOCK}, COL_BLOCK=${BEST_COL_BLOCK}, PARALLEL=${BEST_PARALLEL_SIZE} -> ${BEST_PERF} tokens/s" +} + +trap cleanup EXIT + +for ps in "${PARALLEL_SIZE_VALUES[@]}"; do + for rb in "${ROW_BLOCK_VALUES[@]}"; do + for cb in "${COL_BLOCK_VALUES[@]}"; do + echo "Testing: ROW=${rb}, COL=${cb}, PARALLEL=${ps}" + + echo "// Auto-generated by tuning script" > "$HEADER_FILE" + if [ "$ACT_PARALLEL_DEFINE" = "true" ]; then + echo "#define ACT_PARALLEL" >> "$HEADER_FILE" + fi + echo "#if defined(ACT_PARALLEL)" >> "$HEADER_FILE" + echo " #define ROW_BLOCK_SIZE ${rb}" >> "$HEADER_FILE" + echo " #define COL_BLOCK_SIZE ${cb}" >> "$HEADER_FILE" + echo " #define PARALLEL_SIZE ${ps}" >> "$HEADER_FILE" + echo "#else" >> "$HEADER_FILE" + echo " #define ROW_BLOCK_SIZE ${rb}" >> "$HEADER_FILE" + echo " #define COL_BLOCK_SIZE ${cb}" >> "$HEADER_FILE" + echo " #define PARALLEL_SIZE ${ps}" >> "$HEADER_FILE" + echo "#endif" >> "$HEADER_FILE" + + $BUILD_CMD > /dev/null 2>&1 + + output=$(eval "$BENCH_CMD" 2>&1) + + perf=$(echo "$output" | awk -F '|' ' + /pp128/ && /bitnet/ { + gsub(/ /, "", $8); + split($8, perf, "±"); + print perf[1]; + exit; + } + ') + + if [ -z "$perf" ]; then + perf=0 + fi + + echo "Performance: ${perf} tokens/s" + echo "${rb},${cb},${ps},${perf}" >> "$LOG_FILE" + + if (( $(echo "$perf > $BEST_PERF" | bc -l) )); then + BEST_PERF=$perf + BEST_ROW_BLOCK=$rb + BEST_COL_BLOCK=$cb + BEST_PARALLEL_SIZE=$ps + echo "*** New best found! ***" + fi + done + done +done + +echo "Best configuration: ROW=${BEST_ROW_BLOCK}, COL=${BEST_COL_BLOCK}, PARALLEL=${BEST_PARALLEL_SIZE}" +echo "Best performance: ${BEST_PERF} tokens/s" +EOF + + chmod +x tune_gemm_blocks_auto.sh + bash tune_gemm_blocks_auto.sh + + # Read the best configuration from the log + if [[ -f "stats/tuning_log.csv" ]]; then + BEST_CONFIG=$(tail -n +2 "stats/tuning_log.csv" | sort -t',' -k4 -nr | head -1) + BEST_ROW=$(echo "$BEST_CONFIG" | cut -d',' -f1) + BEST_COL=$(echo "$BEST_CONFIG" | cut -d',' -f2) + BEST_PAR=$(echo "$BEST_CONFIG" | cut -d',' -f3) + BEST_PERF=$(echo "$BEST_CONFIG" | cut -d',' -f4) + + log_success "Best configuration found:" + log_success " ROW_BLOCK_SIZE=${BEST_ROW}, COL_BLOCK_SIZE=${BEST_COL}, PARALLEL_SIZE=${BEST_PAR}" + log_success " Performance: ${BEST_PERF} tokens/s" + + # Apply the best configuration + log_info "Applying best configuration to gemm-config.h..." + cat > include/gemm-config.h << EOF +// Auto-generated with best tuning results +// Best performance: ${BEST_PERF} tokens/s +#define ACT_PARALLEL +#if defined(ACT_PARALLEL) + #define ROW_BLOCK_SIZE ${BEST_ROW} + #define COL_BLOCK_SIZE ${BEST_COL} + #define PARALLEL_SIZE ${BEST_PAR} +#else + #define ROW_BLOCK_SIZE ${BEST_ROW} + #define COL_BLOCK_SIZE ${BEST_COL} + #define PARALLEL_SIZE ${BEST_PAR} +#endif +EOF + + # Rebuild with best configuration + log_info "Rebuilding with best configuration..." + cmake --build build --config Release -j + + log_success "GEMM tuning completed and applied" + else + log_error "Tuning log not found" + fi +} + +################################################################################ +# Step 6: Run Performance Benchmarks +################################################################################ + +step6_benchmark() { + section_header "STEP 6: Running Performance Benchmarks" + + # Get number of threads for this machine + NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "8") + log_info "Detected ${NPROC} CPU cores" + + # Generate thread counts: 1, 2, 4, 8, 16, ... + THREAD_COUNTS="1" + for ((i=2; i<=NPROC; i*=2)); do + THREAD_COUNTS="${THREAD_COUNTS},${i}" + done + + log_info "Testing with thread counts: ${THREAD_COUNTS}" + + # Create benchmark script + cat > bench.sh << EOF +#!/bin/bash +set -e + +MODEL="${MODEL_DIR}/ggml-model-i2_s_embed_q6_k.gguf" +THREADS="${THREAD_COUNTS}" + +if [[ ! -f "\${MODEL}" ]]; then + echo "Error: Model not found: \${MODEL}" + exit 1 +fi + +./build/bin/llama-bench -m "\${MODEL}" -p 128 -n 128 -t "\${THREADS}" -ngl 0 +EOF + + chmod +x bench.sh + + log_info "Running benchmark..." + + # Run benchmark and capture output + ./bench.sh 2>&1 | tee "${BENCH_RAW_FILE}" + + # Parse and format results + log_info "Parsing benchmark results..." + + { + echo "# Benchmark Results" + echo "" + echo "**Machine:** $(uname -m)" + echo "**Timestamp:** $(date)" + echo "**Model:** ${MODEL_NAME}" + echo "**Quantization:** I2_S weight, Q6_K embeddings" + echo "" + echo "## Performance Summary" + echo "" + echo "| Threads | Test Type | Tokens/sec | Std Dev |" + echo "|---------|-----------|------------|---------|" + + awk -F '|' ' + /bitnet.*pp128/ || /bitnet.*tg128/ { + gsub(/^[[:space:]]+|[[:space:]]+$/, "", $6); # threads + gsub(/^[[:space:]]+|[[:space:]]+$/, "", $7); # test + gsub(/^[[:space:]]+|[[:space:]]+$/, "", $8); # t/s + + threads = $6; + test = $7; + + split($8, perf, "±"); + tokens = perf[1]; + gsub(/^[[:space:]]+|[[:space:]]+$/, "", tokens); + + stddev = perf[2]; + gsub(/^[[:space:]]+|[[:space:]]+$/, "", stddev); + + printf "| %7s | %9s | %10s | %7s |\n", threads, test, tokens, stddev; + } + ' "${BENCH_RAW_FILE}" + + echo "" + echo "## Detailed Output" + echo "" + echo '```' + cat "${BENCH_RAW_FILE}" + echo '```' + + } > "${BENCH_RESULTS_FILE}" + + log_success "Benchmark results saved to: ${BENCH_RESULTS_FILE}" +} + +################################################################################ +# Step 7: Run PPL Benchmarks +################################################################################ + +step7_ppl_benchmark() { + section_header "STEP 7: Running Perplexity (PPL) Benchmarks" + + log_info "Checking benchmark datasets..." + + # Check which datasets are available + DATASETS="" + for ds in data/wikitext-2-raw/wiki.test.raw data/ptb/ptb.test.txt data/lambada/lambada_test_plain_text.txt data/clue/tnews.test.txt; do + if [[ -f "$ds" ]]; then + DATASETS="${DATASETS} ${ds}" + log_info "Found dataset: ${ds}" + else + log_warning "Dataset not found: ${ds}" + fi + done + + if [[ -z "${DATASETS}" ]]; then + log_error "No benchmark datasets found in data/ directory" + log_warning "Skipping PPL benchmarks" + return + fi + + log_info "Creating PPL benchmark script..." + + # Create a modified PPL script + cat > embed_quant_ppl_auto.sh << 'EOFPPL' +#!/usr/bin/env bash +set -euo pipefail + +BIN="./build/bin/llama-perplexity" +MODEL_DIR="models/BitNet-b1.58-2B-4T" +MODEL_TEMPLATE="ggml-model-i2_s_embed_{ET}.gguf" + +EMBED_TYPES="f32 bf16 f16 i2_s q3_k q4_0 q5_0 q6_k tq1_0 tq2_0" +DATASETS="DATASETS_PLACEHOLDER" + +THREADS="${THREADS:-16}" +NGL="${NGL:-0}" + +CSV_LOG="ppl_results_temp.csv" + +if [[ ! -x "$BIN" ]]; then + echo "Error: llama-perplexity not found at $BIN" >&2 + exit 1 +fi + +model_size_mib() { + local f="$1" + local sz + sz=$(stat -c %s "$f" 2>/dev/null || stat -f %z "$f" 2>/dev/null || echo 0) + awk -v b="$sz" 'BEGIN { printf("%.2f", b/1024/1024) }' +} + +extract_ppl_final() { + awk ' + /Final estimate/ && /PPL/ { + if (match($0, /PPL[[:space:]]*=[[:space:]]*([0-9]+(\.[0-9]+)?)\s*\+\/\-\s*([0-9]+(\.[0-9]+)?)/, m)) { + print m[1] "," m[3]; + found=1; + } + } + END { if (!found) exit 1 } + ' +} + +extract_perplexity() { + awk ' + { + for (i=1; i<=NF; ++i) { + if (tolower($i) ~ /perplexity/) { + for (j=i; j<=NF; ++j) { + if ($j ~ /^[0-9]+(\.[0-9]+)?$/) { p=$j; break } + gsub(/^.*=/, "", $j); gsub(/,$/, "", $j); gsub(/^\(/, "", $j); gsub(/\)$/, "", $j) + if ($j ~ /^[0-9]+(\.[0-9]+)?$/) { p=$j; break } + } + } + } + if (p) last=p + } + END { if (last) print last }' +} + +echo "| embed-type | model | size | dataset | threads | ppl |" +echo "| ---------- | --------------: | -----: | ------: | ------: | ---------: |" +echo "embed_type,model,model_size_mib,dataset,threads,perplexity,perplexity_err" > "$CSV_LOG" + +for et in $EMBED_TYPES; do + model_glob="${MODEL_DIR}/$(echo "$MODEL_TEMPLATE" | sed "s/{ET}/$et/")" + + found_any=0 + for model in $model_glob; do + [[ -e "$model" ]] || continue + found_any=1 + done + + if [[ $found_any -eq 0 ]]; then + echo "Warning: no models found for embed type '$et', skipping." >&2 + continue + fi + + for model in $model_glob; do + [[ -e "$model" ]] || continue + size_mib=$(model_size_mib "$model") + + for ds in $DATASETS; do + if [[ ! -r "$ds" ]]; then + echo "Warning: dataset not found: $ds (skipping)" >&2 + continue + fi + + echo "==> Testing: model=$model, dataset=$ds" + out=$("$BIN" -m "$model" -f "$ds" -t "$THREADS" -ngl "$NGL" 2>&1 || true) + + ppl_pair=$(echo "$out" | extract_ppl_final || true) + if [[ -n "${ppl_pair:-}" ]]; then + ppl="${ppl_pair%%,*}" + ppl_err="${ppl_pair##*,}" + else + ppl=$(echo "$out" | extract_perplexity || true) + if [[ -z "${ppl:-}" ]]; then + ppl="NA" + fi + ppl_err="NA" + fi + + if [[ "$ppl_err" != "NA" ]]; then + ppl_disp="$ppl ± $ppl_err" + else + ppl_disp="$ppl" + fi + + printf "| %10s | %14s | %6s MiB | %7s | %7s | %10s |\n" \ + "$et" "$(basename "$model")" "$size_mib" "$(basename "$ds")" "$THREADS" "$ppl_disp" + + echo "$et,$(basename "$model"),$size_mib,$(basename "$ds"),$THREADS,$ppl,$ppl_err" >> "$CSV_LOG" + done + done +done + +echo "Done. Results saved to $CSV_LOG" +EOFPPL + + # Replace DATASETS placeholder + sed -i "s|DATASETS_PLACEHOLDER|${DATASETS}|g" embed_quant_ppl_auto.sh + chmod +x embed_quant_ppl_auto.sh + + log_info "Running PPL benchmarks (this may take a while)..." + + # Run the PPL benchmark + ./embed_quant_ppl_auto.sh 2>&1 | tee "${PPL_RESULTS_FILE}.raw" + + # Format the results + { + echo "# Perplexity (PPL) Benchmark Results" + echo "" + echo "**Machine:** $(uname -m)" + echo "**Timestamp:** $(date)" + echo "**Model:** ${MODEL_NAME}" + echo "" + echo "## Results by Embedding Type" + echo "" + + grep "^|" "${PPL_RESULTS_FILE}.raw" || true + + echo "" + echo "## Summary Statistics" + echo "" + + if [[ -f "ppl_results_temp.csv" ]]; then + # Copy to final location + cp ppl_results_temp.csv "${PPL_CSV_FILE}" + + # Generate summary by embed type + echo "### Average PPL by Embedding Type" + echo "" + echo "| Embed Type | Avg PPL | Models Tested |" + echo "|------------|---------|---------------|" + + awk -F',' ' + NR > 1 && $6 != "NA" { + sum[$1] += $6; + count[$1]++; + } + END { + for (et in sum) { + printf "| %10s | %7.2f | %13d |\n", et, sum[et]/count[et], count[et]; + } + } + ' "${PPL_CSV_FILE}" | sort -t'|' -k3 -n + + echo "" + fi + + echo "## Full Raw Output" + echo "" + echo '```' + cat "${PPL_RESULTS_FILE}.raw" + echo '```' + + } > "${PPL_RESULTS_FILE}" + + log_success "PPL results saved to: ${PPL_RESULTS_FILE}" + log_success "PPL CSV data saved to: ${PPL_CSV_FILE}" +} + +################################################################################ +# Main Execution +################################################################################ + +main() { + section_header "Paper Benchmark Automation - Starting" + + log_info "All results will be saved to: ${STATS_DIR}/" + log_info "Timestamp: ${TIMESTAMP}" + + # Execute all steps + step1_machine_info + step2_build + step3_download_convert + step4_quantize_embeddings + step5_tune_gemm + step6_benchmark + step7_ppl_benchmark + + # Final summary + section_header "All Benchmarks Completed!" + + log_success "Results summary:" + log_success " - Machine info: ${MACHINE_INFO_FILE}" + log_success " - Benchmark: ${BENCH_RESULTS_FILE}" + log_success " - PPL results: ${PPL_RESULTS_FILE}" + log_success " - PPL CSV: ${PPL_CSV_FILE}" + log_success " - GEMM tuning log: stats/tuning_log.csv" + + echo "" + log_info "You can find all results in the ${STATS_DIR}/ directory" +} + +# Run main function +main "$@" diff --git a/test_benchmark_setup.sh b/test_benchmark_setup.sh new file mode 100755 index 0000000..0190cb3 --- /dev/null +++ b/test_benchmark_setup.sh @@ -0,0 +1,160 @@ +#!/bin/bash + +################################################################################ +# Quick Test Script for Benchmark Automation +# This script tests individual components without running full benchmarks +################################################################################ + +set -euo pipefail + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' + +echo "========================================" +echo "Testing Benchmark Automation Components" +echo "========================================" +echo "" + +# Test 1: Check system info +echo "Test 1: System Information" +echo " Architecture: $(uname -m)" +echo " CPU cores: $(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 'unknown')" +echo " Python: $(python --version 2>&1 || python3 --version 2>&1)" +if command -v cmake &> /dev/null; then + echo -e " CMake: ${GREEN}✓${NC} $(cmake --version | head -1)" +else + echo -e " CMake: ${RED}✗ Not found${NC}" +fi +if command -v clang &> /dev/null; then + echo -e " Clang: ${GREEN}✓${NC} $(clang --version | head -1)" +else + echo -e " Clang: ${RED}✗ Not found${NC}" +fi +echo "" + +# Test 2: Check required files +echo "Test 2: Required Files" +files=( + "embed_quant.sh" + "tune_gemm_blocks.sh" + "utils/convert-helper-bitnet.py" + "requirements.txt" +) +for f in "${files[@]}"; do + if [[ -f "$f" ]]; then + echo -e " $f: ${GREEN}✓${NC}" + else + echo -e " $f: ${RED}✗ Missing${NC}" + fi +done +echo "" + +# Test 3: Check build directory +echo "Test 3: Build Status" +if [[ -d "build" ]]; then + echo -e " build/ directory: ${GREEN}✓${NC}" + if [[ -f "build/bin/llama-bench" ]]; then + echo -e " llama-bench: ${GREEN}✓${NC}" + else + echo -e " llama-bench: ${RED}✗ Not built${NC}" + fi + if [[ -f "build/bin/llama-perplexity" ]]; then + echo -e " llama-perplexity: ${GREEN}✓${NC}" + else + echo -e " llama-perplexity: ${RED}✗ Not built${NC}" + fi + if [[ -f "build/bin/llama-quantize" ]]; then + echo -e " llama-quantize: ${GREEN}✓${NC}" + else + echo -e " llama-quantize: ${RED}✗ Not built${NC}" + fi +else + echo -e " build/ directory: ${RED}✗ Not found${NC}" +fi +echo "" + +# Test 4: Check data directory +echo "Test 4: Benchmark Datasets" +datasets=( + "data/wikitext-2-raw/wiki.test.raw" + "data/ptb/ptb.test.txt" + "data/lambada/lambada_test_plain_text.txt" + "data/clue/tnews.test.txt" +) +found=0 +for ds in "${datasets[@]}"; do + if [[ -f "$ds" ]]; then + echo -e " $(basename $(dirname $ds)): ${GREEN}✓${NC}" + found=$((found + 1)) + else + echo -e " $(basename $(dirname $ds)): ${RED}✗ Not found${NC}" + fi +done +echo " Total: $found/4 datasets available" +echo "" + +# Test 5: Check models +echo "Test 5: Model Files" +MODEL_DIR="models/BitNet-b1.58-2B-4T" +if [[ -d "$MODEL_DIR" ]]; then + echo -e " Model directory: ${GREEN}✓${NC}" + if [[ -f "$MODEL_DIR/ggml-model-f32.gguf" ]]; then + echo -e " F32 model: ${GREEN}✓${NC}" + else + echo -e " F32 model: ${RED}✗ Not found${NC}" + fi + + # Count quantized models + quant_count=$(ls "$MODEL_DIR"/ggml-model-i2_s_embed_*.gguf 2>/dev/null | wc -l) + if [[ $quant_count -gt 0 ]]; then + echo -e " Quantized models: ${GREEN}✓${NC} ($quant_count files)" + else + echo -e " Quantized models: ${RED}✗ None found${NC}" + fi +else + echo -e " Model directory: ${RED}✗ Not found${NC}" +fi +echo "" + +# Test 6: Thread count generation +echo "Test 6: Thread Configuration" +NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "8") +THREAD_COUNTS="1" +for ((i=2; i<=NPROC; i*=2)); do + THREAD_COUNTS="${THREAD_COUNTS},${i}" +done +echo " Max threads: $NPROC" +echo " Test thread counts: $THREAD_COUNTS" +echo "" + +# Test 7: Check stats directory +echo "Test 7: Output Directory" +if [[ -d "stats" ]]; then + echo -e " stats/ directory: ${GREEN}✓${NC}" + file_count=$(ls stats/ 2>/dev/null | wc -l) + echo " Files in stats/: $file_count" +else + echo -e " stats/ directory: ${RED}✗ Not found${NC}" + echo " Creating stats/ directory..." + mkdir -p stats + echo -e " ${GREEN}✓ Created${NC}" +fi +echo "" + +# Summary +echo "========================================" +echo "Test Summary" +echo "========================================" +echo "" +echo "To run the full benchmark automation:" +echo " ./run_paper_benchmarks.sh" +echo "" +echo "To build the project first (if not built):" +echo " cmake -B build -DCMAKE_BUILD_TYPE=Release" +echo " cmake --build build --config Release" +echo "" +echo "To download and convert the model:" +echo " huggingface-cli download microsoft/BitNet-b1.58-2B-4T --local-dir models/BitNet-b1.58-2B-4T" +echo " python utils/convert-helper-bitnet.py models/BitNet-b1.58-2B-4T" +echo ""