mirror of
https://github.com/microsoft/BitNet.git
synced 2026-05-03 11:20:36 +00:00
[chore] add some automation bash script for BitNet Tech Report
This commit is contained in:
Executable
+121
@@ -0,0 +1,121 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################################################
|
||||
# Quick Demo of Benchmark Automation
|
||||
# This runs a subset of benchmarks to verify the script works
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
STATS_DIR="stats/demo_$(date +%Y%m%d_%H%M%S)"
|
||||
mkdir -p "${STATS_DIR}"
|
||||
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}Quick Benchmark Demo${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo ""
|
||||
echo "Output directory: ${STATS_DIR}"
|
||||
echo ""
|
||||
|
||||
# Test 1: Machine info
|
||||
echo -e "${GREEN}[1/3] Collecting machine info...${NC}"
|
||||
{
|
||||
echo "=== Machine Information ==="
|
||||
echo "Architecture: $(uname -m)"
|
||||
echo "CPU cores: $(nproc)"
|
||||
echo "Timestamp: $(date)"
|
||||
echo ""
|
||||
lscpu | head -20
|
||||
} | tee "${STATS_DIR}/machine_info.txt"
|
||||
echo ""
|
||||
|
||||
# Test 2: Quick benchmark test
|
||||
echo -e "${GREEN}[2/3] Running quick benchmark (2 threads only)...${NC}"
|
||||
if [[ -f "build/bin/llama-bench" ]] && [[ -f "models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_q6_k.gguf" ]]; then
|
||||
./build/bin/llama-bench \
|
||||
-m models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_q6_k.gguf \
|
||||
-p 128 -n 128 -t 1,2,4 -ngl 0 \
|
||||
2>&1 | tee "${STATS_DIR}/bench_quick.txt"
|
||||
|
||||
# Parse results
|
||||
{
|
||||
echo "# Quick Benchmark Results"
|
||||
echo ""
|
||||
echo "| Threads | Test | Tokens/sec |"
|
||||
echo "|---------|------|------------|"
|
||||
|
||||
awk -F '|' '
|
||||
/bitnet.*pp128/ || /bitnet.*tg128/ {
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $6);
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $7);
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $8);
|
||||
split($8, perf, "±");
|
||||
printf "| %7s | %4s | %10s |\n", $6, $7, perf[1];
|
||||
}
|
||||
' "${STATS_DIR}/bench_quick.txt"
|
||||
} > "${STATS_DIR}/bench_results.md"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Results saved to: ${STATS_DIR}/bench_results.md${NC}"
|
||||
cat "${STATS_DIR}/bench_results.md"
|
||||
else
|
||||
echo "Skipping benchmark (model or binary not found)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 3: Quick PPL test (one dataset only)
|
||||
echo -e "${GREEN}[3/3] Running quick PPL test (wikitext-2 only, 2 embed types)...${NC}"
|
||||
if [[ -f "build/bin/llama-perplexity" ]] && [[ -f "data/wikitext-2-raw/wiki.test.raw" ]]; then
|
||||
{
|
||||
echo "# Quick PPL Test"
|
||||
echo ""
|
||||
echo "| Embed Type | PPL |"
|
||||
echo "|------------|-----|"
|
||||
|
||||
for embed in i2_s q6_k; do
|
||||
model="models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_${embed}.gguf"
|
||||
if [[ -f "$model" ]]; then
|
||||
echo "Testing: $embed..."
|
||||
output=$(./build/bin/llama-perplexity \
|
||||
-m "$model" \
|
||||
-f data/wikitext-2-raw/wiki.test.raw \
|
||||
-t 4 -ngl 0 2>&1 || true)
|
||||
|
||||
ppl=$(echo "$output" | awk '
|
||||
/Final estimate/ && /PPL/ {
|
||||
if (match($0, /PPL[[:space:]]*=[[:space:]]*([0-9]+(\.[0-9]+)?)/, m)) {
|
||||
print m[1];
|
||||
exit;
|
||||
}
|
||||
}
|
||||
')
|
||||
|
||||
if [[ -n "$ppl" ]]; then
|
||||
echo "| $embed | $ppl |"
|
||||
else
|
||||
echo "| $embed | N/A |"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
} | tee "${STATS_DIR}/ppl_quick.md"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Results saved to: ${STATS_DIR}/ppl_quick.md${NC}"
|
||||
else
|
||||
echo "Skipping PPL test (binary or dataset not found)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${GREEN}Demo completed!${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo ""
|
||||
echo "All results in: ${STATS_DIR}/"
|
||||
echo ""
|
||||
echo "To run the full automation script:"
|
||||
echo " ./run_paper_benchmarks.sh"
|
||||
echo ""
|
||||
Executable
+720
@@ -0,0 +1,720 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################################################
|
||||
# Paper Benchmark Automation Script
|
||||
# This script automates all experiments needed for the paper on both Intel and ARM
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Color codes for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
STATS_DIR="stats"
|
||||
MODEL_NAME="BitNet-b1.58-2B-4T"
|
||||
MODEL_DIR="models/${MODEL_NAME}"
|
||||
HF_REPO="microsoft/${MODEL_NAME}"
|
||||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||||
MACHINE_INFO_FILE="${STATS_DIR}/machine_info_${TIMESTAMP}.txt"
|
||||
BENCH_RESULTS_FILE="${STATS_DIR}/bench_results_${TIMESTAMP}.md"
|
||||
BENCH_RAW_FILE="${STATS_DIR}/bench_raw_${TIMESTAMP}.txt"
|
||||
PPL_RESULTS_FILE="${STATS_DIR}/ppl_results_${TIMESTAMP}.md"
|
||||
PPL_CSV_FILE="${STATS_DIR}/ppl_results_${TIMESTAMP}.csv"
|
||||
|
||||
# Create stats directory if not exists
|
||||
mkdir -p "${STATS_DIR}"
|
||||
|
||||
################################################################################
|
||||
# Helper Functions
|
||||
################################################################################
|
||||
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
section_header() {
|
||||
echo ""
|
||||
echo "================================================================================"
|
||||
echo -e "${GREEN}$1${NC}"
|
||||
echo "================================================================================"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 1: Machine Information and Environment Setup
|
||||
################################################################################
|
||||
|
||||
step1_machine_info() {
|
||||
section_header "STEP 1: Machine Information and Environment Setup"
|
||||
|
||||
log_info "Collecting machine information..."
|
||||
|
||||
{
|
||||
echo "================================"
|
||||
echo "Machine Information"
|
||||
echo "================================"
|
||||
echo "Timestamp: $(date)"
|
||||
echo ""
|
||||
|
||||
echo "--- System Architecture ---"
|
||||
uname -a
|
||||
echo ""
|
||||
|
||||
echo "--- CPU Information ---"
|
||||
if command -v lscpu &> /dev/null; then
|
||||
lscpu
|
||||
elif [[ -f /proc/cpuinfo ]]; then
|
||||
cat /proc/cpuinfo
|
||||
else
|
||||
log_warning "Could not get CPU information"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "--- CPU Cores ---"
|
||||
NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "unknown")
|
||||
echo "Number of CPU cores: ${NPROC}"
|
||||
echo ""
|
||||
|
||||
echo "--- Memory Information ---"
|
||||
if command -v free &> /dev/null; then
|
||||
free -h
|
||||
elif command -v vm_stat &> /dev/null; then
|
||||
vm_stat
|
||||
else
|
||||
log_warning "Could not get memory information"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "--- Architecture Detection ---"
|
||||
ARCH=$(uname -m)
|
||||
echo "Architecture: ${ARCH}"
|
||||
if [[ "${ARCH}" == "x86_64" ]]; then
|
||||
echo "Platform: Intel/AMD x86_64"
|
||||
elif [[ "${ARCH}" == "aarch64" ]] || [[ "${ARCH}" == "arm64" ]]; then
|
||||
echo "Platform: ARM64"
|
||||
else
|
||||
echo "Platform: Other (${ARCH})"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "--- Compiler Information ---"
|
||||
if command -v clang &> /dev/null; then
|
||||
clang --version
|
||||
fi
|
||||
if command -v gcc &> /dev/null; then
|
||||
gcc --version
|
||||
fi
|
||||
if command -v cmake &> /dev/null; then
|
||||
cmake --version
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "--- Python Environment ---"
|
||||
python --version || python3 --version
|
||||
if command -v conda &> /dev/null; then
|
||||
conda --version
|
||||
echo "Active conda environment: ${CONDA_DEFAULT_ENV:-none}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
} | tee "${MACHINE_INFO_FILE}"
|
||||
|
||||
log_success "Machine information saved to: ${MACHINE_INFO_FILE}"
|
||||
|
||||
# Install dependencies according to README
|
||||
log_info "Installing Python dependencies..."
|
||||
if [[ -f requirements.txt ]]; then
|
||||
pip install -r requirements.txt
|
||||
log_success "Python dependencies installed"
|
||||
else
|
||||
log_warning "requirements.txt not found, skipping dependency installation"
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 2: Build Project
|
||||
################################################################################
|
||||
|
||||
step2_build() {
|
||||
section_header "STEP 2: Building Project"
|
||||
|
||||
log_info "Configuring CMake..."
|
||||
cmake -B build -DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
log_info "Building project..."
|
||||
cmake --build build --config Release
|
||||
|
||||
log_success "Build completed successfully"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 3: Download and Convert Model
|
||||
################################################################################
|
||||
|
||||
step3_download_convert() {
|
||||
section_header "STEP 3: Download and Convert Model"
|
||||
|
||||
if [[ -d "${MODEL_DIR}" ]] && [[ -f "${MODEL_DIR}/ggml-model-f32.gguf" ]]; then
|
||||
log_warning "Model directory already exists and contains f32 model, skipping download"
|
||||
read -p "Do you want to re-download and convert? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create model directory
|
||||
mkdir -p "${MODEL_DIR}"
|
||||
|
||||
# Download from HuggingFace
|
||||
log_info "Downloading model from HuggingFace: ${HF_REPO}"
|
||||
if command -v huggingface-cli &> /dev/null; then
|
||||
huggingface-cli download "${HF_REPO}" --local-dir "${MODEL_DIR}"
|
||||
else
|
||||
log_error "huggingface-cli not found. Please install it with: pip install huggingface_hub"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Convert to f32 GGUF using the helper script
|
||||
log_info "Converting model to f32 GGUF format..."
|
||||
if [[ -f "utils/convert-helper-bitnet.py" ]]; then
|
||||
# The script creates ggml-model-f32-bitnet.gguf, we'll rename it
|
||||
python utils/convert-helper-bitnet.py "${MODEL_DIR}"
|
||||
|
||||
# Rename the output to match expected name
|
||||
if [[ -f "${MODEL_DIR}/ggml-model-f32-bitnet.gguf" ]]; then
|
||||
mv "${MODEL_DIR}/ggml-model-f32-bitnet.gguf" "${MODEL_DIR}/ggml-model-f32.gguf"
|
||||
fi
|
||||
else
|
||||
log_error "Convert helper script not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_success "Model downloaded and converted to f32 GGUF"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 4: Quantize Embeddings
|
||||
################################################################################
|
||||
|
||||
step4_quantize_embeddings() {
|
||||
section_header "STEP 4: Quantize Embeddings"
|
||||
|
||||
log_info "Running embed_quant.sh to create different embedding quantization variants..."
|
||||
|
||||
if [[ ! -f "embed_quant.sh" ]]; then
|
||||
log_error "embed_quant.sh not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
bash embed_quant.sh
|
||||
|
||||
log_success "Embedding quantization completed"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 5: Tune GEMM Block Sizes
|
||||
################################################################################
|
||||
|
||||
step5_tune_gemm() {
|
||||
section_header "STEP 5: Tune GEMM Block Sizes"
|
||||
|
||||
log_info "Running GEMM block size tuning..."
|
||||
|
||||
# Backup original tune script if needed
|
||||
if [[ ! -f "tune_gemm_blocks.sh.bak" ]]; then
|
||||
cp tune_gemm_blocks.sh tune_gemm_blocks.sh.bak
|
||||
fi
|
||||
|
||||
# Get number of threads
|
||||
NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "8")
|
||||
|
||||
# Update the tuning script to use a broader search space
|
||||
log_info "Updating tune_gemm_blocks.sh for comprehensive search..."
|
||||
|
||||
# Create a temporary tuning script with broader search
|
||||
cat > tune_gemm_blocks_auto.sh << 'EOF'
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
HEADER_FILE="include/gemm-config.h"
|
||||
BENCH_CMD="./build/bin/llama-bench -m models/BitNet-b1.58-2B-4T/ggml-model-i2_s_embed_i2_s.gguf -p 128 -n 0 -t 16 -ngl 0"
|
||||
BUILD_CMD="cmake --build build --config Release -j"
|
||||
|
||||
ACT_PARALLEL_DEFINE=true
|
||||
|
||||
# Expanded search space for better tuning
|
||||
ROW_BLOCK_VALUES=(2 4 8)
|
||||
COL_BLOCK_VALUES=(64 128 256)
|
||||
PARALLEL_SIZE_VALUES=(2 4 8)
|
||||
|
||||
BEST_PERF=0
|
||||
BEST_ROW_BLOCK=0
|
||||
BEST_COL_BLOCK=0
|
||||
BEST_PARALLEL_SIZE=0
|
||||
LOG_FILE="stats/tuning_log.csv"
|
||||
|
||||
if [ -f "$HEADER_FILE" ]; then
|
||||
cp "$HEADER_FILE" "${HEADER_FILE}.bak"
|
||||
fi
|
||||
|
||||
echo "Starting comprehensive tuning process..."
|
||||
echo "row_block,col_block,parallel_size,tokens_per_second" > "$LOG_FILE"
|
||||
|
||||
cleanup() {
|
||||
echo "Restoring original header file..."
|
||||
if [ -f "${HEADER_FILE}.bak" ]; then
|
||||
mv "${HEADER_FILE}.bak" "$HEADER_FILE"
|
||||
fi
|
||||
echo "Tuning finished."
|
||||
echo "Best: ROW_BLOCK=${BEST_ROW_BLOCK}, COL_BLOCK=${BEST_COL_BLOCK}, PARALLEL=${BEST_PARALLEL_SIZE} -> ${BEST_PERF} tokens/s"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
for ps in "${PARALLEL_SIZE_VALUES[@]}"; do
|
||||
for rb in "${ROW_BLOCK_VALUES[@]}"; do
|
||||
for cb in "${COL_BLOCK_VALUES[@]}"; do
|
||||
echo "Testing: ROW=${rb}, COL=${cb}, PARALLEL=${ps}"
|
||||
|
||||
echo "// Auto-generated by tuning script" > "$HEADER_FILE"
|
||||
if [ "$ACT_PARALLEL_DEFINE" = "true" ]; then
|
||||
echo "#define ACT_PARALLEL" >> "$HEADER_FILE"
|
||||
fi
|
||||
echo "#if defined(ACT_PARALLEL)" >> "$HEADER_FILE"
|
||||
echo " #define ROW_BLOCK_SIZE ${rb}" >> "$HEADER_FILE"
|
||||
echo " #define COL_BLOCK_SIZE ${cb}" >> "$HEADER_FILE"
|
||||
echo " #define PARALLEL_SIZE ${ps}" >> "$HEADER_FILE"
|
||||
echo "#else" >> "$HEADER_FILE"
|
||||
echo " #define ROW_BLOCK_SIZE ${rb}" >> "$HEADER_FILE"
|
||||
echo " #define COL_BLOCK_SIZE ${cb}" >> "$HEADER_FILE"
|
||||
echo " #define PARALLEL_SIZE ${ps}" >> "$HEADER_FILE"
|
||||
echo "#endif" >> "$HEADER_FILE"
|
||||
|
||||
$BUILD_CMD > /dev/null 2>&1
|
||||
|
||||
output=$(eval "$BENCH_CMD" 2>&1)
|
||||
|
||||
perf=$(echo "$output" | awk -F '|' '
|
||||
/pp128/ && /bitnet/ {
|
||||
gsub(/ /, "", $8);
|
||||
split($8, perf, "±");
|
||||
print perf[1];
|
||||
exit;
|
||||
}
|
||||
')
|
||||
|
||||
if [ -z "$perf" ]; then
|
||||
perf=0
|
||||
fi
|
||||
|
||||
echo "Performance: ${perf} tokens/s"
|
||||
echo "${rb},${cb},${ps},${perf}" >> "$LOG_FILE"
|
||||
|
||||
if (( $(echo "$perf > $BEST_PERF" | bc -l) )); then
|
||||
BEST_PERF=$perf
|
||||
BEST_ROW_BLOCK=$rb
|
||||
BEST_COL_BLOCK=$cb
|
||||
BEST_PARALLEL_SIZE=$ps
|
||||
echo "*** New best found! ***"
|
||||
fi
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "Best configuration: ROW=${BEST_ROW_BLOCK}, COL=${BEST_COL_BLOCK}, PARALLEL=${BEST_PARALLEL_SIZE}"
|
||||
echo "Best performance: ${BEST_PERF} tokens/s"
|
||||
EOF
|
||||
|
||||
chmod +x tune_gemm_blocks_auto.sh
|
||||
bash tune_gemm_blocks_auto.sh
|
||||
|
||||
# Read the best configuration from the log
|
||||
if [[ -f "stats/tuning_log.csv" ]]; then
|
||||
BEST_CONFIG=$(tail -n +2 "stats/tuning_log.csv" | sort -t',' -k4 -nr | head -1)
|
||||
BEST_ROW=$(echo "$BEST_CONFIG" | cut -d',' -f1)
|
||||
BEST_COL=$(echo "$BEST_CONFIG" | cut -d',' -f2)
|
||||
BEST_PAR=$(echo "$BEST_CONFIG" | cut -d',' -f3)
|
||||
BEST_PERF=$(echo "$BEST_CONFIG" | cut -d',' -f4)
|
||||
|
||||
log_success "Best configuration found:"
|
||||
log_success " ROW_BLOCK_SIZE=${BEST_ROW}, COL_BLOCK_SIZE=${BEST_COL}, PARALLEL_SIZE=${BEST_PAR}"
|
||||
log_success " Performance: ${BEST_PERF} tokens/s"
|
||||
|
||||
# Apply the best configuration
|
||||
log_info "Applying best configuration to gemm-config.h..."
|
||||
cat > include/gemm-config.h << EOF
|
||||
// Auto-generated with best tuning results
|
||||
// Best performance: ${BEST_PERF} tokens/s
|
||||
#define ACT_PARALLEL
|
||||
#if defined(ACT_PARALLEL)
|
||||
#define ROW_BLOCK_SIZE ${BEST_ROW}
|
||||
#define COL_BLOCK_SIZE ${BEST_COL}
|
||||
#define PARALLEL_SIZE ${BEST_PAR}
|
||||
#else
|
||||
#define ROW_BLOCK_SIZE ${BEST_ROW}
|
||||
#define COL_BLOCK_SIZE ${BEST_COL}
|
||||
#define PARALLEL_SIZE ${BEST_PAR}
|
||||
#endif
|
||||
EOF
|
||||
|
||||
# Rebuild with best configuration
|
||||
log_info "Rebuilding with best configuration..."
|
||||
cmake --build build --config Release -j
|
||||
|
||||
log_success "GEMM tuning completed and applied"
|
||||
else
|
||||
log_error "Tuning log not found"
|
||||
fi
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 6: Run Performance Benchmarks
|
||||
################################################################################
|
||||
|
||||
step6_benchmark() {
|
||||
section_header "STEP 6: Running Performance Benchmarks"
|
||||
|
||||
# Get number of threads for this machine
|
||||
NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "8")
|
||||
log_info "Detected ${NPROC} CPU cores"
|
||||
|
||||
# Generate thread counts: 1, 2, 4, 8, 16, ...
|
||||
THREAD_COUNTS="1"
|
||||
for ((i=2; i<=NPROC; i*=2)); do
|
||||
THREAD_COUNTS="${THREAD_COUNTS},${i}"
|
||||
done
|
||||
|
||||
log_info "Testing with thread counts: ${THREAD_COUNTS}"
|
||||
|
||||
# Create benchmark script
|
||||
cat > bench.sh << EOF
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
MODEL="${MODEL_DIR}/ggml-model-i2_s_embed_q6_k.gguf"
|
||||
THREADS="${THREAD_COUNTS}"
|
||||
|
||||
if [[ ! -f "\${MODEL}" ]]; then
|
||||
echo "Error: Model not found: \${MODEL}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
./build/bin/llama-bench -m "\${MODEL}" -p 128 -n 128 -t "\${THREADS}" -ngl 0
|
||||
EOF
|
||||
|
||||
chmod +x bench.sh
|
||||
|
||||
log_info "Running benchmark..."
|
||||
|
||||
# Run benchmark and capture output
|
||||
./bench.sh 2>&1 | tee "${BENCH_RAW_FILE}"
|
||||
|
||||
# Parse and format results
|
||||
log_info "Parsing benchmark results..."
|
||||
|
||||
{
|
||||
echo "# Benchmark Results"
|
||||
echo ""
|
||||
echo "**Machine:** $(uname -m)"
|
||||
echo "**Timestamp:** $(date)"
|
||||
echo "**Model:** ${MODEL_NAME}"
|
||||
echo "**Quantization:** I2_S weight, Q6_K embeddings"
|
||||
echo ""
|
||||
echo "## Performance Summary"
|
||||
echo ""
|
||||
echo "| Threads | Test Type | Tokens/sec | Std Dev |"
|
||||
echo "|---------|-----------|------------|---------|"
|
||||
|
||||
awk -F '|' '
|
||||
/bitnet.*pp128/ || /bitnet.*tg128/ {
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $6); # threads
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $7); # test
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $8); # t/s
|
||||
|
||||
threads = $6;
|
||||
test = $7;
|
||||
|
||||
split($8, perf, "±");
|
||||
tokens = perf[1];
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", tokens);
|
||||
|
||||
stddev = perf[2];
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", stddev);
|
||||
|
||||
printf "| %7s | %9s | %10s | %7s |\n", threads, test, tokens, stddev;
|
||||
}
|
||||
' "${BENCH_RAW_FILE}"
|
||||
|
||||
echo ""
|
||||
echo "## Detailed Output"
|
||||
echo ""
|
||||
echo '```'
|
||||
cat "${BENCH_RAW_FILE}"
|
||||
echo '```'
|
||||
|
||||
} > "${BENCH_RESULTS_FILE}"
|
||||
|
||||
log_success "Benchmark results saved to: ${BENCH_RESULTS_FILE}"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Step 7: Run PPL Benchmarks
|
||||
################################################################################
|
||||
|
||||
step7_ppl_benchmark() {
|
||||
section_header "STEP 7: Running Perplexity (PPL) Benchmarks"
|
||||
|
||||
log_info "Checking benchmark datasets..."
|
||||
|
||||
# Check which datasets are available
|
||||
DATASETS=""
|
||||
for ds in data/wikitext-2-raw/wiki.test.raw data/ptb/ptb.test.txt data/lambada/lambada_test_plain_text.txt data/clue/tnews.test.txt; do
|
||||
if [[ -f "$ds" ]]; then
|
||||
DATASETS="${DATASETS} ${ds}"
|
||||
log_info "Found dataset: ${ds}"
|
||||
else
|
||||
log_warning "Dataset not found: ${ds}"
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "${DATASETS}" ]]; then
|
||||
log_error "No benchmark datasets found in data/ directory"
|
||||
log_warning "Skipping PPL benchmarks"
|
||||
return
|
||||
fi
|
||||
|
||||
log_info "Creating PPL benchmark script..."
|
||||
|
||||
# Create a modified PPL script
|
||||
cat > embed_quant_ppl_auto.sh << 'EOFPPL'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
BIN="./build/bin/llama-perplexity"
|
||||
MODEL_DIR="models/BitNet-b1.58-2B-4T"
|
||||
MODEL_TEMPLATE="ggml-model-i2_s_embed_{ET}.gguf"
|
||||
|
||||
EMBED_TYPES="f32 bf16 f16 i2_s q3_k q4_0 q5_0 q6_k tq1_0 tq2_0"
|
||||
DATASETS="DATASETS_PLACEHOLDER"
|
||||
|
||||
THREADS="${THREADS:-16}"
|
||||
NGL="${NGL:-0}"
|
||||
|
||||
CSV_LOG="ppl_results_temp.csv"
|
||||
|
||||
if [[ ! -x "$BIN" ]]; then
|
||||
echo "Error: llama-perplexity not found at $BIN" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
model_size_mib() {
|
||||
local f="$1"
|
||||
local sz
|
||||
sz=$(stat -c %s "$f" 2>/dev/null || stat -f %z "$f" 2>/dev/null || echo 0)
|
||||
awk -v b="$sz" 'BEGIN { printf("%.2f", b/1024/1024) }'
|
||||
}
|
||||
|
||||
extract_ppl_final() {
|
||||
awk '
|
||||
/Final estimate/ && /PPL/ {
|
||||
if (match($0, /PPL[[:space:]]*=[[:space:]]*([0-9]+(\.[0-9]+)?)\s*\+\/\-\s*([0-9]+(\.[0-9]+)?)/, m)) {
|
||||
print m[1] "," m[3];
|
||||
found=1;
|
||||
}
|
||||
}
|
||||
END { if (!found) exit 1 }
|
||||
'
|
||||
}
|
||||
|
||||
extract_perplexity() {
|
||||
awk '
|
||||
{
|
||||
for (i=1; i<=NF; ++i) {
|
||||
if (tolower($i) ~ /perplexity/) {
|
||||
for (j=i; j<=NF; ++j) {
|
||||
if ($j ~ /^[0-9]+(\.[0-9]+)?$/) { p=$j; break }
|
||||
gsub(/^.*=/, "", $j); gsub(/,$/, "", $j); gsub(/^\(/, "", $j); gsub(/\)$/, "", $j)
|
||||
if ($j ~ /^[0-9]+(\.[0-9]+)?$/) { p=$j; break }
|
||||
}
|
||||
}
|
||||
}
|
||||
if (p) last=p
|
||||
}
|
||||
END { if (last) print last }'
|
||||
}
|
||||
|
||||
echo "| embed-type | model | size | dataset | threads | ppl |"
|
||||
echo "| ---------- | --------------: | -----: | ------: | ------: | ---------: |"
|
||||
echo "embed_type,model,model_size_mib,dataset,threads,perplexity,perplexity_err" > "$CSV_LOG"
|
||||
|
||||
for et in $EMBED_TYPES; do
|
||||
model_glob="${MODEL_DIR}/$(echo "$MODEL_TEMPLATE" | sed "s/{ET}/$et/")"
|
||||
|
||||
found_any=0
|
||||
for model in $model_glob; do
|
||||
[[ -e "$model" ]] || continue
|
||||
found_any=1
|
||||
done
|
||||
|
||||
if [[ $found_any -eq 0 ]]; then
|
||||
echo "Warning: no models found for embed type '$et', skipping." >&2
|
||||
continue
|
||||
fi
|
||||
|
||||
for model in $model_glob; do
|
||||
[[ -e "$model" ]] || continue
|
||||
size_mib=$(model_size_mib "$model")
|
||||
|
||||
for ds in $DATASETS; do
|
||||
if [[ ! -r "$ds" ]]; then
|
||||
echo "Warning: dataset not found: $ds (skipping)" >&2
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "==> Testing: model=$model, dataset=$ds"
|
||||
out=$("$BIN" -m "$model" -f "$ds" -t "$THREADS" -ngl "$NGL" 2>&1 || true)
|
||||
|
||||
ppl_pair=$(echo "$out" | extract_ppl_final || true)
|
||||
if [[ -n "${ppl_pair:-}" ]]; then
|
||||
ppl="${ppl_pair%%,*}"
|
||||
ppl_err="${ppl_pair##*,}"
|
||||
else
|
||||
ppl=$(echo "$out" | extract_perplexity || true)
|
||||
if [[ -z "${ppl:-}" ]]; then
|
||||
ppl="NA"
|
||||
fi
|
||||
ppl_err="NA"
|
||||
fi
|
||||
|
||||
if [[ "$ppl_err" != "NA" ]]; then
|
||||
ppl_disp="$ppl ± $ppl_err"
|
||||
else
|
||||
ppl_disp="$ppl"
|
||||
fi
|
||||
|
||||
printf "| %10s | %14s | %6s MiB | %7s | %7s | %10s |\n" \
|
||||
"$et" "$(basename "$model")" "$size_mib" "$(basename "$ds")" "$THREADS" "$ppl_disp"
|
||||
|
||||
echo "$et,$(basename "$model"),$size_mib,$(basename "$ds"),$THREADS,$ppl,$ppl_err" >> "$CSV_LOG"
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "Done. Results saved to $CSV_LOG"
|
||||
EOFPPL
|
||||
|
||||
# Replace DATASETS placeholder
|
||||
sed -i "s|DATASETS_PLACEHOLDER|${DATASETS}|g" embed_quant_ppl_auto.sh
|
||||
chmod +x embed_quant_ppl_auto.sh
|
||||
|
||||
log_info "Running PPL benchmarks (this may take a while)..."
|
||||
|
||||
# Run the PPL benchmark
|
||||
./embed_quant_ppl_auto.sh 2>&1 | tee "${PPL_RESULTS_FILE}.raw"
|
||||
|
||||
# Format the results
|
||||
{
|
||||
echo "# Perplexity (PPL) Benchmark Results"
|
||||
echo ""
|
||||
echo "**Machine:** $(uname -m)"
|
||||
echo "**Timestamp:** $(date)"
|
||||
echo "**Model:** ${MODEL_NAME}"
|
||||
echo ""
|
||||
echo "## Results by Embedding Type"
|
||||
echo ""
|
||||
|
||||
grep "^|" "${PPL_RESULTS_FILE}.raw" || true
|
||||
|
||||
echo ""
|
||||
echo "## Summary Statistics"
|
||||
echo ""
|
||||
|
||||
if [[ -f "ppl_results_temp.csv" ]]; then
|
||||
# Copy to final location
|
||||
cp ppl_results_temp.csv "${PPL_CSV_FILE}"
|
||||
|
||||
# Generate summary by embed type
|
||||
echo "### Average PPL by Embedding Type"
|
||||
echo ""
|
||||
echo "| Embed Type | Avg PPL | Models Tested |"
|
||||
echo "|------------|---------|---------------|"
|
||||
|
||||
awk -F',' '
|
||||
NR > 1 && $6 != "NA" {
|
||||
sum[$1] += $6;
|
||||
count[$1]++;
|
||||
}
|
||||
END {
|
||||
for (et in sum) {
|
||||
printf "| %10s | %7.2f | %13d |\n", et, sum[et]/count[et], count[et];
|
||||
}
|
||||
}
|
||||
' "${PPL_CSV_FILE}" | sort -t'|' -k3 -n
|
||||
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "## Full Raw Output"
|
||||
echo ""
|
||||
echo '```'
|
||||
cat "${PPL_RESULTS_FILE}.raw"
|
||||
echo '```'
|
||||
|
||||
} > "${PPL_RESULTS_FILE}"
|
||||
|
||||
log_success "PPL results saved to: ${PPL_RESULTS_FILE}"
|
||||
log_success "PPL CSV data saved to: ${PPL_CSV_FILE}"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Main Execution
|
||||
################################################################################
|
||||
|
||||
main() {
|
||||
section_header "Paper Benchmark Automation - Starting"
|
||||
|
||||
log_info "All results will be saved to: ${STATS_DIR}/"
|
||||
log_info "Timestamp: ${TIMESTAMP}"
|
||||
|
||||
# Execute all steps
|
||||
step1_machine_info
|
||||
step2_build
|
||||
step3_download_convert
|
||||
step4_quantize_embeddings
|
||||
step5_tune_gemm
|
||||
step6_benchmark
|
||||
step7_ppl_benchmark
|
||||
|
||||
# Final summary
|
||||
section_header "All Benchmarks Completed!"
|
||||
|
||||
log_success "Results summary:"
|
||||
log_success " - Machine info: ${MACHINE_INFO_FILE}"
|
||||
log_success " - Benchmark: ${BENCH_RESULTS_FILE}"
|
||||
log_success " - PPL results: ${PPL_RESULTS_FILE}"
|
||||
log_success " - PPL CSV: ${PPL_CSV_FILE}"
|
||||
log_success " - GEMM tuning log: stats/tuning_log.csv"
|
||||
|
||||
echo ""
|
||||
log_info "You can find all results in the ${STATS_DIR}/ directory"
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
Executable
+160
@@ -0,0 +1,160 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################################################
|
||||
# Quick Test Script for Benchmark Automation
|
||||
# This script tests individual components without running full benchmarks
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
GREEN='\033[0;32m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo "========================================"
|
||||
echo "Testing Benchmark Automation Components"
|
||||
echo "========================================"
|
||||
echo ""
|
||||
|
||||
# Test 1: Check system info
|
||||
echo "Test 1: System Information"
|
||||
echo " Architecture: $(uname -m)"
|
||||
echo " CPU cores: $(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 'unknown')"
|
||||
echo " Python: $(python --version 2>&1 || python3 --version 2>&1)"
|
||||
if command -v cmake &> /dev/null; then
|
||||
echo -e " CMake: ${GREEN}✓${NC} $(cmake --version | head -1)"
|
||||
else
|
||||
echo -e " CMake: ${RED}✗ Not found${NC}"
|
||||
fi
|
||||
if command -v clang &> /dev/null; then
|
||||
echo -e " Clang: ${GREEN}✓${NC} $(clang --version | head -1)"
|
||||
else
|
||||
echo -e " Clang: ${RED}✗ Not found${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 2: Check required files
|
||||
echo "Test 2: Required Files"
|
||||
files=(
|
||||
"embed_quant.sh"
|
||||
"tune_gemm_blocks.sh"
|
||||
"utils/convert-helper-bitnet.py"
|
||||
"requirements.txt"
|
||||
)
|
||||
for f in "${files[@]}"; do
|
||||
if [[ -f "$f" ]]; then
|
||||
echo -e " $f: ${GREEN}✓${NC}"
|
||||
else
|
||||
echo -e " $f: ${RED}✗ Missing${NC}"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Test 3: Check build directory
|
||||
echo "Test 3: Build Status"
|
||||
if [[ -d "build" ]]; then
|
||||
echo -e " build/ directory: ${GREEN}✓${NC}"
|
||||
if [[ -f "build/bin/llama-bench" ]]; then
|
||||
echo -e " llama-bench: ${GREEN}✓${NC}"
|
||||
else
|
||||
echo -e " llama-bench: ${RED}✗ Not built${NC}"
|
||||
fi
|
||||
if [[ -f "build/bin/llama-perplexity" ]]; then
|
||||
echo -e " llama-perplexity: ${GREEN}✓${NC}"
|
||||
else
|
||||
echo -e " llama-perplexity: ${RED}✗ Not built${NC}"
|
||||
fi
|
||||
if [[ -f "build/bin/llama-quantize" ]]; then
|
||||
echo -e " llama-quantize: ${GREEN}✓${NC}"
|
||||
else
|
||||
echo -e " llama-quantize: ${RED}✗ Not built${NC}"
|
||||
fi
|
||||
else
|
||||
echo -e " build/ directory: ${RED}✗ Not found${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 4: Check data directory
|
||||
echo "Test 4: Benchmark Datasets"
|
||||
datasets=(
|
||||
"data/wikitext-2-raw/wiki.test.raw"
|
||||
"data/ptb/ptb.test.txt"
|
||||
"data/lambada/lambada_test_plain_text.txt"
|
||||
"data/clue/tnews.test.txt"
|
||||
)
|
||||
found=0
|
||||
for ds in "${datasets[@]}"; do
|
||||
if [[ -f "$ds" ]]; then
|
||||
echo -e " $(basename $(dirname $ds)): ${GREEN}✓${NC}"
|
||||
found=$((found + 1))
|
||||
else
|
||||
echo -e " $(basename $(dirname $ds)): ${RED}✗ Not found${NC}"
|
||||
fi
|
||||
done
|
||||
echo " Total: $found/4 datasets available"
|
||||
echo ""
|
||||
|
||||
# Test 5: Check models
|
||||
echo "Test 5: Model Files"
|
||||
MODEL_DIR="models/BitNet-b1.58-2B-4T"
|
||||
if [[ -d "$MODEL_DIR" ]]; then
|
||||
echo -e " Model directory: ${GREEN}✓${NC}"
|
||||
if [[ -f "$MODEL_DIR/ggml-model-f32.gguf" ]]; then
|
||||
echo -e " F32 model: ${GREEN}✓${NC}"
|
||||
else
|
||||
echo -e " F32 model: ${RED}✗ Not found${NC}"
|
||||
fi
|
||||
|
||||
# Count quantized models
|
||||
quant_count=$(ls "$MODEL_DIR"/ggml-model-i2_s_embed_*.gguf 2>/dev/null | wc -l)
|
||||
if [[ $quant_count -gt 0 ]]; then
|
||||
echo -e " Quantized models: ${GREEN}✓${NC} ($quant_count files)"
|
||||
else
|
||||
echo -e " Quantized models: ${RED}✗ None found${NC}"
|
||||
fi
|
||||
else
|
||||
echo -e " Model directory: ${RED}✗ Not found${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 6: Thread count generation
|
||||
echo "Test 6: Thread Configuration"
|
||||
NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo "8")
|
||||
THREAD_COUNTS="1"
|
||||
for ((i=2; i<=NPROC; i*=2)); do
|
||||
THREAD_COUNTS="${THREAD_COUNTS},${i}"
|
||||
done
|
||||
echo " Max threads: $NPROC"
|
||||
echo " Test thread counts: $THREAD_COUNTS"
|
||||
echo ""
|
||||
|
||||
# Test 7: Check stats directory
|
||||
echo "Test 7: Output Directory"
|
||||
if [[ -d "stats" ]]; then
|
||||
echo -e " stats/ directory: ${GREEN}✓${NC}"
|
||||
file_count=$(ls stats/ 2>/dev/null | wc -l)
|
||||
echo " Files in stats/: $file_count"
|
||||
else
|
||||
echo -e " stats/ directory: ${RED}✗ Not found${NC}"
|
||||
echo " Creating stats/ directory..."
|
||||
mkdir -p stats
|
||||
echo -e " ${GREEN}✓ Created${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Summary
|
||||
echo "========================================"
|
||||
echo "Test Summary"
|
||||
echo "========================================"
|
||||
echo ""
|
||||
echo "To run the full benchmark automation:"
|
||||
echo " ./run_paper_benchmarks.sh"
|
||||
echo ""
|
||||
echo "To build the project first (if not built):"
|
||||
echo " cmake -B build -DCMAKE_BUILD_TYPE=Release"
|
||||
echo " cmake --build build --config Release"
|
||||
echo ""
|
||||
echo "To download and convert the model:"
|
||||
echo " huggingface-cli download microsoft/BitNet-b1.58-2B-4T --local-dir models/BitNet-b1.58-2B-4T"
|
||||
echo " python utils/convert-helper-bitnet.py models/BitNet-b1.58-2B-4T"
|
||||
echo ""
|
||||
Reference in New Issue
Block a user