Skip to main content

Performance Optimization

Best practices and techniques for optimizing Bash script performance, efficiency, and resource usage.

Performance Fundamentals

Understanding Performance

# Measure execution time
time ./script.sh

# Detailed timing
time -p ./script.sh

# Custom timing
start_time=$(date +%s.%N)
./script.sh
end_time=$(date +%s.%N)
duration=$(echo "$end_time - $start_time" | bc)
echo "Execution time: ${duration}s"

Resource Monitoring

# Monitor resource usage
top -p $$ # Monitor current process
htop # Interactive process viewer
iostat 1 # I/O statistics
vmstat 1 # Virtual memory statistics

# Memory usage
ps -o pid,vsz,rss,comm -p $$
cat /proc/$$/status | grep -E "VmSize|VmRSS"

Process Optimization

Efficient Command Execution

# Avoid unnecessary command substitution
# Slow
result=$(echo "hello world" | cut -d' ' -f1)

# Fast
result="hello world"
result=${result%% *}

# Avoid multiple pipe operations
# Slow
cat file.txt | grep pattern | awk '{print $1}' | sort

# Fast
awk '/pattern/ {print $1}' file.txt | sort

Builtin vs External Commands

# Use bash builtins when possible
# Slow (external command)
if [ "$(echo $string | wc -c)" -gt 10 ]; then
echo "String is long"
fi

# Fast (builtin)
if [ ${#string} -gt 10 ]; then
echo "String is long"
fi

# Common builtins
printf "%s\n" "text" # Instead of echo
read -r line # Instead of head -1

Minimize Subshells

# Avoid unnecessary subshells
# Slow
result=$(cd /tmp && pwd)

# Fast
result="/tmp"

# Avoid command substitution in loops
# Slow
for file in $(ls *.txt); do
echo "$file"
done

# Fast
for file in *.txt; do
echo "$file"
done

String Operations

Efficient String Manipulation

# Parameter expansion (fast)
string="hello world"
echo "${string#hello }" # Remove prefix
echo "${string%world}" # Remove suffix
echo "${string/world/bash}" # Replace first occurrence
echo "${string//l/L}" # Replace all occurrences

# Avoid sed/awk for simple operations
# Slow
echo "$string" | sed 's/world/bash/'

# Fast
echo "${string/world/bash}"

String Comparison

# Use [[ for string comparison
# Slow
if [ "$string" = "hello" ]; then
echo "match"
fi

# Fast
if [[ $string == "hello" ]]; then
echo "match"
fi

# Pattern matching
if [[ $string == hello* ]]; then
echo "starts with hello"
fi

File Operations

Efficient File Processing

# Read files efficiently
# Slow (spawns cat process)
while read -r line; do
echo "$line"
done < <(cat file.txt)

# Fast (direct file reading)
while IFS= read -r line; do
echo "$line"
done < file.txt

# Process large files
# Use mapfile for arrays
mapfile -t lines < file.txt

File Testing

# Use efficient file tests
# Multiple tests on same file
if [[ -f "$file" && -r "$file" && -s "$file" ]]; then
echo "File is readable and non-empty"
fi

# Cache file stats
if [[ -f "$file" ]]; then
if [[ -r "$file" && -s "$file" ]]; then
echo "File is readable and non-empty"
fi
fi

Loop Optimization

Efficient Loops

# Use C-style loops for arithmetic
# Slow
for i in $(seq 1 1000); do
echo "$i"
done

# Fast
for ((i=1; i<=1000; i++)); do
echo "$i"
done

# Avoid command substitution in loops
# Slow
for file in $(find . -name "*.txt"); do
echo "$file"
done

# Fast
while IFS= read -r -d '' file; do
echo "$file"
done < <(find . -name "*.txt" -print0)

Loop Unrolling

# Process multiple items per iteration
process_files() {
local files=("$@")
local i=0

# Process 4 files at a time
while [[ $i -lt ${#files[@]} ]]; do
process_file "${files[$i]}" &
[[ $((i+1)) -lt ${#files[@]} ]] && process_file "${files[$((i+1))]}" &
[[ $((i+2)) -lt ${#files[@]} ]] && process_file "${files[$((i+2))]}" &
[[ $((i+3)) -lt ${#files[@]} ]] && process_file "${files[$((i+3))]}" &
wait
i=$((i+4))
done
}

Memory Management

Efficient Variable Usage

# Avoid large string concatenation
# Slow
result=""
for i in {1..1000}; do
result="$result$i"
done

# Fast (use arrays)
results=()
for i in {1..1000}; do
results+=("$i")
done
result="${results[*]}"

# Unset large variables
unset large_array

Memory-Efficient Processing

# Process files in chunks
process_large_file() {
local file="$1"
local chunk_size=1000
local line_count=0

while IFS= read -r line; do
# Process line
process_line "$line"

line_count=$((line_count + 1))

# Periodic cleanup
if [[ $((line_count % chunk_size)) -eq 0 ]]; then
# Clear processed data
unset processed_data
fi
done < "$file"
}

Parallel Processing

Background Jobs

# Simple parallel execution
process_files_parallel() {
local max_jobs=4
local job_count=0

for file in *.txt; do
process_file "$file" &
job_count=$((job_count + 1))

# Wait if max jobs reached
if [[ $job_count -ge $max_jobs ]]; then
wait
job_count=0
fi
done

# Wait for remaining jobs
wait
}

Job Control

# Advanced job control
parallel_processor() {
local max_jobs="$1"
shift
local tasks=("$@")
local running_jobs=0

for task in "${tasks[@]}"; do
# Start job
eval "$task" &
running_jobs=$((running_jobs + 1))

# Check if we need to wait
if [[ $running_jobs -ge $max_jobs ]]; then
wait -n # Wait for any job to complete
running_jobs=$((running_jobs - 1))
fi
done

# Wait for all remaining jobs
wait
}

GNU Parallel

# Use GNU parallel for heavy processing
# Install: sudo apt install parallel

# Basic usage
parallel echo {} ::: 1 2 3 4 5

# Process files
parallel process_file {} ::: *.txt

# Limit concurrent jobs
parallel -j 4 process_file {} ::: *.txt

# Progress monitoring
parallel --progress process_file {} ::: *.txt

I/O Optimization

Efficient File I/O

# Batch file operations
# Slow
for file in *.txt; do
cp "$file" /backup/
done

# Fast
cp *.txt /backup/

# Use appropriate buffer sizes
# Large file copy with progress
copy_large_file() {
local source="$1"
local dest="$2"
local buffer_size=1048576 # 1MB

dd if="$source" of="$dest" bs=$buffer_size status=progress
}

Network I/O

# Connection pooling
# Slow (new connection each time)
for url in "${urls[@]}"; do
curl "$url"
done

# Fast (reuse connections)
{
for url in "${urls[@]}"; do
echo "url = $url"
done
} | curl --parallel --parallel-immediate --config -

Algorithm Optimization

Data Structure Choice

# Use associative arrays for lookups
# Slow (linear search)
users=("alice" "bob" "charlie")
is_valid_user() {
local user="$1"
for valid_user in "${users[@]}"; do
if [[ "$user" == "$valid_user" ]]; then
return 0
fi
done
return 1
}

# Fast (hash lookup)
declare -A valid_users
valid_users["alice"]=1
valid_users["bob"]=1
valid_users["charlie"]=1

is_valid_user() {
local user="$1"
[[ -n "${valid_users[$user]}" ]]
}

Sorting Optimization

# Use appropriate sorting method
# For small arrays: use bash sort
bubble_sort() {
local -n arr=$1
local n=${#arr[@]}

for ((i=0; i<n-1; i++)); do
for ((j=0; j<n-i-1; j++)); do
if [[ "${arr[j]}" > "${arr[j+1]}" ]]; then
# Swap
local temp="${arr[j]}"
arr[j]="${arr[j+1]}"
arr[j+1]="$temp"
fi
done
done
}

# For large arrays: use external sort
large_sort() {
local -n arr=$1
local temp_file=$(mktemp)

printf '%s\n' "${arr[@]}" | sort > "$temp_file"
mapfile -t arr < "$temp_file"
rm "$temp_file"
}

Caching and Memoization

Result Caching

# Cache expensive computations
declare -A cache

expensive_function() {
local input="$1"
local cache_key="expensive_$input"

# Check cache
if [[ -n "${cache[$cache_key]}" ]]; then
echo "${cache[$cache_key]}"
return
fi

# Compute result
local result
result=$(some_expensive_computation "$input")

# Cache result
cache[$cache_key]="$result"
echo "$result"
}

File-Based Caching

# Cache to files
cache_to_file() {
local cache_key="$1"
local cache_dir="/tmp/script_cache"
local cache_file="$cache_dir/$cache_key"

mkdir -p "$cache_dir"

# Check if cache exists and is recent
if [[ -f "$cache_file" && $(($(date +%s) - $(stat -c %Y "$cache_file"))) -lt 3600 ]]; then
cat "$cache_file"
return
fi

# Generate and cache result
local result
result=$(expensive_operation)
echo "$result" > "$cache_file"
echo "$result"
}

Profiling and Benchmarking

Performance Profiling

# Profile script execution
profile_script() {
local script="$1"

echo "Profiling $script..."

# Time execution
time bash -x "$script" 2>&1 | head -20

# Memory usage
/usr/bin/time -v bash "$script" 2>&1 | grep -E "Maximum resident|User time|System time"
}

Benchmarking Functions

# Benchmark function execution
benchmark_function() {
local func_name="$1"
local iterations="${2:-100}"

echo "Benchmarking $func_name ($iterations iterations)..."

local start_time=$(date +%s.%N)

for ((i=0; i<iterations; i++)); do
$func_name "$@"
done

local end_time=$(date +%s.%N)
local total_time=$(echo "$end_time - $start_time" | bc)
local avg_time=$(echo "scale=6; $total_time / $iterations" | bc)

echo "Total time: ${total_time}s"
echo "Average time: ${avg_time}s"
}

Best Practices

Performance Guidelines

  1. Use builtins - Prefer bash builtins over external commands
  2. Minimize subshells - Avoid unnecessary command substitution
  3. Efficient loops - Use appropriate loop constructs
  4. Parallel processing - Use background jobs for independent tasks
  5. Cache results - Store expensive computations
  6. Profile regularly - Measure actual performance
  7. Choose algorithms - Use appropriate data structures

Memory Guidelines

  1. Unset variables - Free memory from large variables
  2. Process in chunks - Don't load entire files into memory
  3. Use streams - Process data as it flows
  4. Avoid string concatenation - Use arrays instead
  5. Monitor usage - Track memory consumption

Common Optimizations

# Replace external commands with builtins
# Instead of: $(basename "$file")
echo "${file##*/}"

# Instead of: $(dirname "$file")
echo "${file%/*}"

# Instead of: $(expr $a + $b)
echo $((a + b))

# Instead of: $(echo "$string" | wc -c)
echo ${#string}

# Instead of: $(echo "$string" | tr '[:upper:]' '[:lower:]')
echo "${string,,}"

Performance Testing

Load Testing

# Simulate load
load_test() {
local concurrent_jobs="$1"
local total_requests="$2"
local target_function="$3"

echo "Load testing: $concurrent_jobs concurrent jobs, $total_requests total requests"

local requests_per_job=$((total_requests / concurrent_jobs))

for ((i=0; i<concurrent_jobs; i++)); do
(
for ((j=0; j<requests_per_job; j++)); do
$target_function
done
) &
done

wait
echo "Load test completed"
}

Stress Testing

# Memory stress test
memory_stress_test() {
local max_size="$1" # in MB
local current_size=0
local arrays=()

while [[ $current_size -lt $max_size ]]; do
local array_name="stress_array_$current_size"
eval "$array_name=($(seq 1 10000))"
arrays+=("$array_name")
current_size=$((current_size + 1))

echo "Allocated ${current_size}MB"
sleep 0.1
done

echo "Cleaning up..."
for array in "${arrays[@]}"; do
unset "$array"
done
}

Monitoring and Alerting

Performance Monitoring

# Monitor script performance
monitor_performance() {
local script="$1"
local max_time="$2"

local start_time=$(date +%s)
timeout "$max_time" bash "$script"
local exit_code=$?
local end_time=$(date +%s)
local duration=$((end_time - start_time))

if [[ $exit_code -eq 124 ]]; then
echo "WARNING: Script timed out after ${max_time}s"
elif [[ $duration -gt $((max_time / 2)) ]]; then
echo "WARNING: Script took ${duration}s (threshold: $((max_time / 2))s)"
fi
}

Resource Alerting

# Alert on resource usage
check_resources() {
local memory_threshold=80
local cpu_threshold=90

# Check memory usage
local memory_usage=$(free | awk '/Mem:/ {printf("%.0f", $3/$2*100)}')
if [[ $memory_usage -gt $memory_threshold ]]; then
echo "ALERT: Memory usage at ${memory_usage}%"
fi

# Check CPU usage
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
if [[ ${cpu_usage%.*} -gt $cpu_threshold ]]; then
echo "ALERT: CPU usage at ${cpu_usage}%"
fi
}

See Advanced Features for complex optimizations and System Operations for system-level performance considerations.