Performance Optimization
Best practices and techniques for optimizing Bash script performance, efficiency, and resource usage.
Performance Fundamentals
Understanding Performance
# Measure execution time
time ./script.sh
# Detailed timing
time -p ./script.sh
# Custom timing
start_time=$(date +%s.%N)
./script.sh
end_time=$(date +%s.%N)
duration=$(echo "$end_time - $start_time" | bc)
echo "Execution time: ${duration}s"
Resource Monitoring
# Monitor resource usage
top -p $$ # Monitor current process
htop # Interactive process viewer
iostat 1 # I/O statistics
vmstat 1 # Virtual memory statistics
# Memory usage
ps -o pid,vsz,rss,comm -p $$
cat /proc/$$/status | grep -E "VmSize|VmRSS"
Process Optimization
Efficient Command Execution
# Avoid unnecessary command substitution
# Slow
result=$(echo "hello world" | cut -d' ' -f1)
# Fast
result="hello world"
result=${result%% *}
# Avoid multiple pipe operations
# Slow
cat file.txt | grep pattern | awk '{print $1}' | sort
# Fast
awk '/pattern/ {print $1}' file.txt | sort
Builtin vs External Commands
# Use bash builtins when possible
# Slow (external command)
if [ "$(echo $string | wc -c)" -gt 10 ]; then
echo "String is long"
fi
# Fast (builtin)
if [ ${#string} -gt 10 ]; then
echo "String is long"
fi
# Common builtins
printf "%s\n" "text" # Instead of echo
read -r line # Instead of head -1
Minimize Subshells
# Avoid unnecessary subshells
# Slow
result=$(cd /tmp && pwd)
# Fast
result="/tmp"
# Avoid command substitution in loops
# Slow
for file in $(ls *.txt); do
echo "$file"
done
# Fast
for file in *.txt; do
echo "$file"
done
String Operations
Efficient String Manipulation
# Parameter expansion (fast)
string="hello world"
echo "${string#hello }" # Remove prefix
echo "${string%world}" # Remove suffix
echo "${string/world/bash}" # Replace first occurrence
echo "${string//l/L}" # Replace all occurrences
# Avoid sed/awk for simple operations
# Slow
echo "$string" | sed 's/world/bash/'
# Fast
echo "${string/world/bash}"
String Comparison
# Use [[ for string comparison
# Slow
if [ "$string" = "hello" ]; then
echo "match"
fi
# Fast
if [[ $string == "hello" ]]; then
echo "match"
fi
# Pattern matching
if [[ $string == hello* ]]; then
echo "starts with hello"
fi
File Operations
Efficient File Processing
# Read files efficiently
# Slow (spawns cat process)
while read -r line; do
echo "$line"
done < <(cat file.txt)
# Fast (direct file reading)
while IFS= read -r line; do
echo "$line"
done < file.txt
# Process large files
# Use mapfile for arrays
mapfile -t lines < file.txt
File Testing
# Use efficient file tests
# Multiple tests on same file
if [[ -f "$file" && -r "$file" && -s "$file" ]]; then
echo "File is readable and non-empty"
fi
# Cache file stats
if [[ -f "$file" ]]; then
if [[ -r "$file" && -s "$file" ]]; then
echo "File is readable and non-empty"
fi
fi
Loop Optimization
Efficient Loops
# Use C-style loops for arithmetic
# Slow
for i in $(seq 1 1000); do
echo "$i"
done
# Fast
for ((i=1; i<=1000; i++)); do
echo "$i"
done
# Avoid command substitution in loops
# Slow
for file in $(find . -name "*.txt"); do
echo "$file"
done
# Fast
while IFS= read -r -d '' file; do
echo "$file"
done < <(find . -name "*.txt" -print0)
Loop Unrolling
# Process multiple items per iteration
process_files() {
local files=("$@")
local i=0
# Process 4 files at a time
while [[ $i -lt ${#files[@]} ]]; do
process_file "${files[$i]}" &
[[ $((i+1)) -lt ${#files[@]} ]] && process_file "${files[$((i+1))]}" &
[[ $((i+2)) -lt ${#files[@]} ]] && process_file "${files[$((i+2))]}" &
[[ $((i+3)) -lt ${#files[@]} ]] && process_file "${files[$((i+3))]}" &
wait
i=$((i+4))
done
}
Memory Management
Efficient Variable Usage
# Avoid large string concatenation
# Slow
result=""
for i in {1..1000}; do
result="$result$i"
done
# Fast (use arrays)
results=()
for i in {1..1000}; do
results+=("$i")
done
result="${results[*]}"
# Unset large variables
unset large_array
Memory-Efficient Processing
# Process files in chunks
process_large_file() {
local file="$1"
local chunk_size=1000
local line_count=0
while IFS= read -r line; do
# Process line
process_line "$line"
line_count=$((line_count + 1))
# Periodic cleanup
if [[ $((line_count % chunk_size)) -eq 0 ]]; then
# Clear processed data
unset processed_data
fi
done < "$file"
}
Parallel Processing
Background Jobs
# Simple parallel execution
process_files_parallel() {
local max_jobs=4
local job_count=0
for file in *.txt; do
process_file "$file" &
job_count=$((job_count + 1))
# Wait if max jobs reached
if [[ $job_count -ge $max_jobs ]]; then
wait
job_count=0
fi
done
# Wait for remaining jobs
wait
}
Job Control
# Advanced job control
parallel_processor() {
local max_jobs="$1"
shift
local tasks=("$@")
local running_jobs=0
for task in "${tasks[@]}"; do
# Start job
eval "$task" &
running_jobs=$((running_jobs + 1))
# Check if we need to wait
if [[ $running_jobs -ge $max_jobs ]]; then
wait -n # Wait for any job to complete
running_jobs=$((running_jobs - 1))
fi
done
# Wait for all remaining jobs
wait
}
GNU Parallel
# Use GNU parallel for heavy processing
# Install: sudo apt install parallel
# Basic usage
parallel echo {} ::: 1 2 3 4 5
# Process files
parallel process_file {} ::: *.txt
# Limit concurrent jobs
parallel -j 4 process_file {} ::: *.txt
# Progress monitoring
parallel --progress process_file {} ::: *.txt
I/O Optimization
Efficient File I/O
# Batch file operations
# Slow
for file in *.txt; do
cp "$file" /backup/
done
# Fast
cp *.txt /backup/
# Use appropriate buffer sizes
# Large file copy with progress
copy_large_file() {
local source="$1"
local dest="$2"
local buffer_size=1048576 # 1MB
dd if="$source" of="$dest" bs=$buffer_size status=progress
}
Network I/O
# Connection pooling
# Slow (new connection each time)
for url in "${urls[@]}"; do
curl "$url"
done
# Fast (reuse connections)
{
for url in "${urls[@]}"; do
echo "url = $url"
done
} | curl --parallel --parallel-immediate --config -
Algorithm Optimization
Data Structure Choice
# Use associative arrays for lookups
# Slow (linear search)
users=("alice" "bob" "charlie")
is_valid_user() {
local user="$1"
for valid_user in "${users[@]}"; do
if [[ "$user" == "$valid_user" ]]; then
return 0
fi
done
return 1
}
# Fast (hash lookup)
declare -A valid_users
valid_users["alice"]=1
valid_users["bob"]=1
valid_users["charlie"]=1
is_valid_user() {
local user="$1"
[[ -n "${valid_users[$user]}" ]]
}
Sorting Optimization
# Use appropriate sorting method
# For small arrays: use bash sort
bubble_sort() {
local -n arr=$1
local n=${#arr[@]}
for ((i=0; i<n-1; i++)); do
for ((j=0; j<n-i-1; j++)); do
if [[ "${arr[j]}" > "${arr[j+1]}" ]]; then
# Swap
local temp="${arr[j]}"
arr[j]="${arr[j+1]}"
arr[j+1]="$temp"
fi
done
done
}
# For large arrays: use external sort
large_sort() {
local -n arr=$1
local temp_file=$(mktemp)
printf '%s\n' "${arr[@]}" | sort > "$temp_file"
mapfile -t arr < "$temp_file"
rm "$temp_file"
}
Caching and Memoization
Result Caching
# Cache expensive computations
declare -A cache
expensive_function() {
local input="$1"
local cache_key="expensive_$input"
# Check cache
if [[ -n "${cache[$cache_key]}" ]]; then
echo "${cache[$cache_key]}"
return
fi
# Compute result
local result
result=$(some_expensive_computation "$input")
# Cache result
cache[$cache_key]="$result"
echo "$result"
}
File-Based Caching
# Cache to files
cache_to_file() {
local cache_key="$1"
local cache_dir="/tmp/script_cache"
local cache_file="$cache_dir/$cache_key"
mkdir -p "$cache_dir"
# Check if cache exists and is recent
if [[ -f "$cache_file" && $(($(date +%s) - $(stat -c %Y "$cache_file"))) -lt 3600 ]]; then
cat "$cache_file"
return
fi
# Generate and cache result
local result
result=$(expensive_operation)
echo "$result" > "$cache_file"
echo "$result"
}
Profiling and Benchmarking
Performance Profiling
# Profile script execution
profile_script() {
local script="$1"
echo "Profiling $script..."
# Time execution
time bash -x "$script" 2>&1 | head -20
# Memory usage
/usr/bin/time -v bash "$script" 2>&1 | grep -E "Maximum resident|User time|System time"
}
Benchmarking Functions
# Benchmark function execution
benchmark_function() {
local func_name="$1"
local iterations="${2:-100}"
echo "Benchmarking $func_name ($iterations iterations)..."
local start_time=$(date +%s.%N)
for ((i=0; i<iterations; i++)); do
$func_name "$@"
done
local end_time=$(date +%s.%N)
local total_time=$(echo "$end_time - $start_time" | bc)
local avg_time=$(echo "scale=6; $total_time / $iterations" | bc)
echo "Total time: ${total_time}s"
echo "Average time: ${avg_time}s"
}
Best Practices
Performance Guidelines
- Use builtins - Prefer bash builtins over external commands
- Minimize subshells - Avoid unnecessary command substitution
- Efficient loops - Use appropriate loop constructs
- Parallel processing - Use background jobs for independent tasks
- Cache results - Store expensive computations
- Profile regularly - Measure actual performance
- Choose algorithms - Use appropriate data structures
Memory Guidelines
- Unset variables - Free memory from large variables
- Process in chunks - Don't load entire files into memory
- Use streams - Process data as it flows
- Avoid string concatenation - Use arrays instead
- Monitor usage - Track memory consumption
Common Optimizations
# Replace external commands with builtins
# Instead of: $(basename "$file")
echo "${file##*/}"
# Instead of: $(dirname "$file")
echo "${file%/*}"
# Instead of: $(expr $a + $b)
echo $((a + b))
# Instead of: $(echo "$string" | wc -c)
echo ${#string}
# Instead of: $(echo "$string" | tr '[:upper:]' '[:lower:]')
echo "${string,,}"
Performance Testing
Load Testing
# Simulate load
load_test() {
local concurrent_jobs="$1"
local total_requests="$2"
local target_function="$3"
echo "Load testing: $concurrent_jobs concurrent jobs, $total_requests total requests"
local requests_per_job=$((total_requests / concurrent_jobs))
for ((i=0; i<concurrent_jobs; i++)); do
(
for ((j=0; j<requests_per_job; j++)); do
$target_function
done
) &
done
wait
echo "Load test completed"
}
Stress Testing
# Memory stress test
memory_stress_test() {
local max_size="$1" # in MB
local current_size=0
local arrays=()
while [[ $current_size -lt $max_size ]]; do
local array_name="stress_array_$current_size"
eval "$array_name=($(seq 1 10000))"
arrays+=("$array_name")
current_size=$((current_size + 1))
echo "Allocated ${current_size}MB"
sleep 0.1
done
echo "Cleaning up..."
for array in "${arrays[@]}"; do
unset "$array"
done
}
Monitoring and Alerting
Performance Monitoring
# Monitor script performance
monitor_performance() {
local script="$1"
local max_time="$2"
local start_time=$(date +%s)
timeout "$max_time" bash "$script"
local exit_code=$?
local end_time=$(date +%s)
local duration=$((end_time - start_time))
if [[ $exit_code -eq 124 ]]; then
echo "WARNING: Script timed out after ${max_time}s"
elif [[ $duration -gt $((max_time / 2)) ]]; then
echo "WARNING: Script took ${duration}s (threshold: $((max_time / 2))s)"
fi
}
Resource Alerting
# Alert on resource usage
check_resources() {
local memory_threshold=80
local cpu_threshold=90
# Check memory usage
local memory_usage=$(free | awk '/Mem:/ {printf("%.0f", $3/$2*100)}')
if [[ $memory_usage -gt $memory_threshold ]]; then
echo "ALERT: Memory usage at ${memory_usage}%"
fi
# Check CPU usage
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
if [[ ${cpu_usage%.*} -gt $cpu_threshold ]]; then
echo "ALERT: CPU usage at ${cpu_usage}%"
fi
}
See Advanced Features for complex optimizations and System Operations for system-level performance considerations.