Performance Optimization

Best practices and techniques for optimizing Bash script performance, efficiency, and resource usage.

Performance Fundamentals

Understanding Performance

# Measure execution time
time ./script.sh

# Detailed timing
time -p ./script.sh

# Custom timing
start_time=$(date +%s.%N)
./script.sh
end_time=$(date +%s.%N)
duration=$(echo "$end_time - $start_time" | bc)
echo "Execution time: ${duration}s"

Resource Monitoring

# Monitor resource usage
top -p $$                     # Monitor current process
htop                          # Interactive process viewer
iostat 1                      # I/O statistics
vmstat 1                      # Virtual memory statistics

# Memory usage
ps -o pid,vsz,rss,comm -p $$
cat /proc/$$/status | grep -E "VmSize|VmRSS"

Process Optimization

Efficient Command Execution

# Avoid unnecessary command substitution
# Slow
result=$(echo "hello world" | cut -d' ' -f1)

# Fast
result="hello world"
result=${result%% *}

# Avoid multiple pipe operations
# Slow
cat file.txt | grep pattern | awk '{print $1}' | sort

# Fast
awk '/pattern/ {print $1}' file.txt | sort

Builtin vs External Commands

# Use bash builtins when possible
# Slow (external command)
if [ "$(echo $string | wc -c)" -gt 10 ]; then
    echo "String is long"
fi

# Fast (builtin)
if [ ${#string} -gt 10 ]; then
    echo "String is long"
fi

# Common builtins
printf "%s\n" "text"          # Instead of echo
read -r line                  # Instead of head -1

Minimize Subshells

# Avoid unnecessary subshells
# Slow
result=$(cd /tmp && pwd)

# Fast
result="/tmp"

# Avoid command substitution in loops
# Slow
for file in $(ls *.txt); do
    echo "$file"
done

# Fast
for file in *.txt; do
    echo "$file"
done

String Operations

Efficient String Manipulation

# Parameter expansion (fast)
string="hello world"
echo "${string#hello }"       # Remove prefix
echo "${string%world}"        # Remove suffix
echo "${string/world/bash}"   # Replace first occurrence
echo "${string//l/L}"         # Replace all occurrences

# Avoid sed/awk for simple operations
# Slow
echo "$string" | sed 's/world/bash/'

# Fast
echo "${string/world/bash}"

String Comparison

# Use [[ for string comparison
# Slow
if [ "$string" = "hello" ]; then
    echo "match"
fi

# Fast
if [[ $string == "hello" ]]; then
    echo "match"
fi

# Pattern matching
if [[ $string == hello* ]]; then
    echo "starts with hello"
fi

File Operations

Efficient File Processing

# Read files efficiently
# Slow (spawns cat process)
while read -r line; do
    echo "$line"
done < <(cat file.txt)

# Fast (direct file reading)
while IFS= read -r line; do
    echo "$line"
done < file.txt

# Process large files
# Use mapfile for arrays
mapfile -t lines < file.txt

File Testing

# Use efficient file tests
# Multiple tests on same file
if [[ -f "$file" && -r "$file" && -s "$file" ]]; then
    echo "File is readable and non-empty"
fi

# Cache file stats
if [[ -f "$file" ]]; then
    if [[ -r "$file" && -s "$file" ]]; then
        echo "File is readable and non-empty"
    fi
fi

Loop Optimization

Efficient Loops

# Use C-style loops for arithmetic
# Slow
for i in $(seq 1 1000); do
    echo "$i"
done

# Fast
for ((i=1; i<=1000; i++)); do
    echo "$i"
done

# Avoid command substitution in loops
# Slow
for file in $(find . -name "*.txt"); do
    echo "$file"
done

# Fast
while IFS= read -r -d '' file; do
    echo "$file"
done < <(find . -name "*.txt" -print0)

Loop Unrolling

# Process multiple items per iteration
process_files() {
    local files=("$@")
    local i=0

    # Process 4 files at a time
    while [[ $i -lt ${#files[@]} ]]; do
        process_file "${files[$i]}" &
        [[ $((i+1)) -lt ${#files[@]} ]] && process_file "${files[$((i+1))]}" &
        [[ $((i+2)) -lt ${#files[@]} ]] && process_file "${files[$((i+2))]}" &
        [[ $((i+3)) -lt ${#files[@]} ]] && process_file "${files[$((i+3))]}" &
        wait
        i=$((i+4))
    done
}

Memory Management

Efficient Variable Usage

# Avoid large string concatenation
# Slow
result=""
for i in {1..1000}; do
    result="$result$i"
done

# Fast (use arrays)
results=()
for i in {1..1000}; do
    results+=("$i")
done
result="${results[*]}"

# Unset large variables
unset large_array

Memory-Efficient Processing

# Process files in chunks
process_large_file() {
    local file="$1"
    local chunk_size=1000
    local line_count=0

    while IFS= read -r line; do
        # Process line
        process_line "$line"

        line_count=$((line_count + 1))

        # Periodic cleanup
        if [[ $((line_count % chunk_size)) -eq 0 ]]; then
            # Clear processed data
            unset processed_data
        fi
    done < "$file"
}

Parallel Processing

Background Jobs

# Simple parallel execution
process_files_parallel() {
    local max_jobs=4
    local job_count=0

    for file in *.txt; do
        process_file "$file" &
        job_count=$((job_count + 1))

        # Wait if max jobs reached
        if [[ $job_count -ge $max_jobs ]]; then
            wait
            job_count=0
        fi
    done

    # Wait for remaining jobs
    wait
}

Job Control

# Advanced job control
parallel_processor() {
    local max_jobs="$1"
    shift
    local tasks=("$@")
    local running_jobs=0

    for task in "${tasks[@]}"; do
        # Start job
        eval "$task" &
        running_jobs=$((running_jobs + 1))

        # Check if we need to wait
        if [[ $running_jobs -ge $max_jobs ]]; then
            wait -n  # Wait for any job to complete
            running_jobs=$((running_jobs - 1))
        fi
    done

    # Wait for all remaining jobs
    wait
}

GNU Parallel

# Use GNU parallel for heavy processing
# Install: sudo apt install parallel

# Basic usage
parallel echo {} ::: 1 2 3 4 5

# Process files
parallel process_file {} ::: *.txt

# Limit concurrent jobs
parallel -j 4 process_file {} ::: *.txt

# Progress monitoring
parallel --progress process_file {} ::: *.txt

I/O Optimization

Efficient File I/O

# Batch file operations
# Slow
for file in *.txt; do
    cp "$file" /backup/
done

# Fast
cp *.txt /backup/

# Use appropriate buffer sizes
# Large file copy with progress
copy_large_file() {
    local source="$1"
    local dest="$2"
    local buffer_size=1048576  # 1MB

    dd if="$source" of="$dest" bs=$buffer_size status=progress
}

Network I/O

# Connection pooling
# Slow (new connection each time)
for url in "${urls[@]}"; do
    curl "$url"
done

# Fast (reuse connections)
{
    for url in "${urls[@]}"; do
        echo "url = $url"
    done
} | curl --parallel --parallel-immediate --config -

Algorithm Optimization

Data Structure Choice

# Use associative arrays for lookups
# Slow (linear search)
users=("alice" "bob" "charlie")
is_valid_user() {
    local user="$1"
    for valid_user in "${users[@]}"; do
        if [[ "$user" == "$valid_user" ]]; then
            return 0
        fi
    done
    return 1
}

# Fast (hash lookup)
declare -A valid_users
valid_users["alice"]=1
valid_users["bob"]=1
valid_users["charlie"]=1

is_valid_user() {
    local user="$1"
    [[ -n "${valid_users[$user]}" ]]
}

Sorting Optimization

# Use appropriate sorting method
# For small arrays: use bash sort
bubble_sort() {
    local -n arr=$1
    local n=${#arr[@]}

    for ((i=0; i<n-1; i++)); do
        for ((j=0; j<n-i-1; j++)); do
            if [[ "${arr[j]}" > "${arr[j+1]}" ]]; then
                # Swap
                local temp="${arr[j]}"
                arr[j]="${arr[j+1]}"
                arr[j+1]="$temp"
            fi
        done
    done
}

# For large arrays: use external sort
large_sort() {
    local -n arr=$1
    local temp_file=$(mktemp)

    printf '%s\n' "${arr[@]}" | sort > "$temp_file"
    mapfile -t arr < "$temp_file"
    rm "$temp_file"
}

Caching and Memoization

Result Caching

# Cache expensive computations
declare -A cache

expensive_function() {
    local input="$1"
    local cache_key="expensive_$input"

    # Check cache
    if [[ -n "${cache[$cache_key]}" ]]; then
        echo "${cache[$cache_key]}"
        return
    fi

    # Compute result
    local result
    result=$(some_expensive_computation "$input")

    # Cache result
    cache[$cache_key]="$result"
    echo "$result"
}

File-Based Caching

# Cache to files
cache_to_file() {
    local cache_key="$1"
    local cache_dir="/tmp/script_cache"
    local cache_file="$cache_dir/$cache_key"

    mkdir -p "$cache_dir"

    # Check if cache exists and is recent
    if [[ -f "$cache_file" && $(($(date +%s) - $(stat -c %Y "$cache_file"))) -lt 3600 ]]; then
        cat "$cache_file"
        return
    fi

    # Generate and cache result
    local result
    result=$(expensive_operation)
    echo "$result" > "$cache_file"
    echo "$result"
}

Profiling and Benchmarking

Performance Profiling

# Profile script execution
profile_script() {
    local script="$1"

    echo "Profiling $script..."

    # Time execution
    time bash -x "$script" 2>&1 | head -20

    # Memory usage
    /usr/bin/time -v bash "$script" 2>&1 | grep -E "Maximum resident|User time|System time"
}

Benchmarking Functions

# Benchmark function execution
benchmark_function() {
    local func_name="$1"
    local iterations="${2:-100}"

    echo "Benchmarking $func_name ($iterations iterations)..."

    local start_time=$(date +%s.%N)

    for ((i=0; i<iterations; i++)); do
        $func_name "$@"
    done

    local end_time=$(date +%s.%N)
    local total_time=$(echo "$end_time - $start_time" | bc)
    local avg_time=$(echo "scale=6; $total_time / $iterations" | bc)

    echo "Total time: ${total_time}s"
    echo "Average time: ${avg_time}s"
}

Best Practices

Performance Guidelines

Use builtins - Prefer bash builtins over external commands
Minimize subshells - Avoid unnecessary command substitution
Efficient loops - Use appropriate loop constructs
Parallel processing - Use background jobs for independent tasks
Cache results - Store expensive computations
Profile regularly - Measure actual performance
Choose algorithms - Use appropriate data structures

Memory Guidelines

Unset variables - Free memory from large variables
Process in chunks - Don't load entire files into memory
Use streams - Process data as it flows
Avoid string concatenation - Use arrays instead
Monitor usage - Track memory consumption

Common Optimizations

# Replace external commands with builtins
# Instead of: $(basename "$file")
echo "${file##*/}"

# Instead of: $(dirname "$file")
echo "${file%/*}"

# Instead of: $(expr $a + $b)
echo $((a + b))

# Instead of: $(echo "$string" | wc -c)
echo ${#string}

# Instead of: $(echo "$string" | tr '[:upper:]' '[:lower:]')
echo "${string,,}"

Performance Testing

Load Testing

# Simulate load
load_test() {
    local concurrent_jobs="$1"
    local total_requests="$2"
    local target_function="$3"

    echo "Load testing: $concurrent_jobs concurrent jobs, $total_requests total requests"

    local requests_per_job=$((total_requests / concurrent_jobs))

    for ((i=0; i<concurrent_jobs; i++)); do
        (
            for ((j=0; j<requests_per_job; j++)); do
                $target_function
            done
        ) &
    done

    wait
    echo "Load test completed"
}

Stress Testing

# Memory stress test
memory_stress_test() {
    local max_size="$1"  # in MB
    local current_size=0
    local arrays=()

    while [[ $current_size -lt $max_size ]]; do
        local array_name="stress_array_$current_size"
        eval "$array_name=($(seq 1 10000))"
        arrays+=("$array_name")
        current_size=$((current_size + 1))

        echo "Allocated ${current_size}MB"
        sleep 0.1
    done

    echo "Cleaning up..."
    for array in "${arrays[@]}"; do
        unset "$array"
    done
}

Monitoring and Alerting

Performance Monitoring

# Monitor script performance
monitor_performance() {
    local script="$1"
    local max_time="$2"

    local start_time=$(date +%s)
    timeout "$max_time" bash "$script"
    local exit_code=$?
    local end_time=$(date +%s)
    local duration=$((end_time - start_time))

    if [[ $exit_code -eq 124 ]]; then
        echo "WARNING: Script timed out after ${max_time}s"
    elif [[ $duration -gt $((max_time / 2)) ]]; then
        echo "WARNING: Script took ${duration}s (threshold: $((max_time / 2))s)"
    fi
}

Resource Alerting

# Alert on resource usage
check_resources() {
    local memory_threshold=80
    local cpu_threshold=90

    # Check memory usage
    local memory_usage=$(free | awk '/Mem:/ {printf("%.0f", $3/$2*100)}')
    if [[ $memory_usage -gt $memory_threshold ]]; then
        echo "ALERT: Memory usage at ${memory_usage}%"
    fi

    # Check CPU usage
    local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
    if [[ ${cpu_usage%.*} -gt $cpu_threshold ]]; then
        echo "ALERT: CPU usage at ${cpu_usage}%"
    fi
}

See Advanced Features for complex optimizations and System Operations for system-level performance considerations.

Performance Fundamentals​

Understanding Performance​

Resource Monitoring​

Process Optimization​

Efficient Command Execution​

Builtin vs External Commands​

Minimize Subshells​

String Operations​

Efficient String Manipulation​

String Comparison​

File Operations​

Efficient File Processing​

File Testing​

Loop Optimization​

Efficient Loops​

Loop Unrolling​

Memory Management​

Efficient Variable Usage​

Memory-Efficient Processing​

Parallel Processing​

Background Jobs​

Job Control​

GNU Parallel​

I/O Optimization​

Efficient File I/O​

Network I/O​

Algorithm Optimization​

Data Structure Choice​

Sorting Optimization​

Caching and Memoization​

Result Caching​

File-Based Caching​

Profiling and Benchmarking​

Performance Profiling​

Benchmarking Functions​

Best Practices​

Performance Guidelines​

Memory Guidelines​

Common Optimizations​

Performance Testing​

Load Testing​

Stress Testing​

Monitoring and Alerting​

Performance Monitoring​

Resource Alerting​