Skip to main content

Performance Monitoring

System Overview Tools

Load Average and Uptime

uptime                 # System uptime and load average
w # Who is logged in and system load
cat /proc/loadavg # Load average raw data

Multi-Purpose Monitoring

top                    # Real-time process and system monitor
htop # Enhanced interactive process viewer
atop # Advanced system and process monitor
glances # Cross-platform system monitoring
nmon # System performance monitor
dstat # Versatile system statistics

CPU Performance Monitoring

CPU Usage Analysis

# Basic CPU monitoring
top -o %CPU # Sort processes by CPU usage
ps aux --sort=-%cpu | head -10 # Top CPU consumers
htop # Interactive CPU monitoring

# CPU statistics
vmstat 1 5 # CPU, memory, I/O stats (1 sec, 5 times)
vmstat -S M 1 # Show in MB
sar -u 1 5 # CPU utilization statistics
mpstat 1 5 # Multi-processor statistics

Advanced CPU Monitoring

# CPU frequency and governors
cpufreq-info # CPU frequency information
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq

# CPU performance counters
perf stat ./program # Performance statistics
perf top # Real-time performance monitoring
perf record ./program # Record performance data
perf report # Analyze recorded data

CPU Load Analysis

# Understanding load average
# Load of 1.0 = 100% CPU utilization on single core
# Load of 2.0 = 200% CPU utilization (2 cores at 100%)

# Check number of CPUs
nproc # Number of processors
lscpu # CPU information
cat /proc/cpuinfo | grep processor | wc -l

Memory Performance Monitoring

Memory Usage Analysis

# Basic memory monitoring
free -h # Memory usage (human-readable)
free -m # Memory usage in MB
cat /proc/meminfo # Detailed memory information

# Memory usage by process
ps aux --sort=-%mem | head -10 # Top memory consumers
pmap PID # Process memory map
cat /proc/PID/smaps # Detailed process memory info

Memory Statistics

# Memory performance
vmstat -s # Memory statistics summary
vmstat 1 5 # Monitor memory over time
sar -r 1 5 # Memory utilization statistics
sar -R 1 5 # Memory paging statistics

Memory Pressure and Swapping

# Swap monitoring
swapon --show # Show swap devices
cat /proc/swaps # Swap usage information
sar -S 1 5 # Swap statistics

# Memory pressure indicators
cat /proc/pressure/memory # Memory pressure (if supported)
vmstat 1 | awk 'NR>2 {print $7, $8}' # Swap in/out

Disk I/O Performance Monitoring

Disk Usage Monitoring

# Disk space usage
df -h # Filesystem usage
df -i # Inode usage
du -sh /path/* # Directory sizes
du -ah /path | sort -rh | head -20 # Largest files

# Disk I/O statistics
iostat -x 1 5 # Extended I/O statistics
iostat -d 1 5 # Device utilization
sar -d 1 5 # Disk activity statistics

Advanced Disk Monitoring

# I/O monitoring by process
iotop # I/O usage by process
iotop -o # Only show processes with I/O
pidstat -d 1 5 # Process I/O statistics

# Block device monitoring
blkid # Block device information
lsblk # List block devices
cat /proc/diskstats # Disk statistics

Disk Performance Testing

# Disk performance tests
dd if=/dev/zero of=/tmp/test bs=1M count=1000 oflag=direct
dd if=/tmp/test of=/dev/null bs=1M iflag=direct
hdparm -tT /dev/sda # Hard disk timing

Network Performance Monitoring

Network Interface Statistics

# Network interface monitoring
sar -n DEV 1 5 # Network device statistics
cat /proc/net/dev # Network device statistics
netstat -i # Interface statistics
ip -s link # Interface statistics with ip command

Network Connection Monitoring

# Network connections
netstat -tulpn # All connections with PIDs
ss -tulpn # Modern replacement for netstat
ss -s # Socket statistics summary
lsof -i # Network connections by process

Network Traffic Analysis

# Traffic monitoring
iftop # Network traffic by connection
nethogs # Network traffic by process
nload # Network load monitoring
bmon # Bandwidth monitor

Network Performance Testing

# Bandwidth testing
iperf3 -s # Server mode
iperf3 -c server_ip # Client mode
iperf3 -c server_ip -t 30 # 30-second test

# Network latency
ping -c 10 host # Basic latency test
mtr host # Real-time network diagnostics

System-wide Performance Monitoring

Performance Monitoring Tools

# System activity reporter
sar -A # All system activity
sar -u 1 60 # CPU usage (1 sec intervals, 60 times)
sar -r 1 60 # Memory usage
sar -d 1 60 # Disk activity
sar -n DEV 1 60 # Network activity

# Process monitoring
pidstat -u 1 5 # CPU usage by process
pidstat -r 1 5 # Memory usage by process
pidstat -d 1 5 # Disk I/O by process

System Resource Limits

# Check system limits
ulimit -a # All limits for current user
cat /proc/sys/fs/file-max # Maximum file descriptors
cat /proc/sys/kernel/pid_max # Maximum process ID
sysctl kernel.threads-max # Maximum threads

Process Performance Monitoring

Process Analysis

# Process monitoring
ps aux --sort=-%cpu # Sort by CPU usage
ps aux --sort=-%mem # Sort by memory usage
ps -eo pid,ppid,cmd,%cpu,%mem --sort=-%cpu

# Process tree
pstree -p # Process tree with PIDs
ps -ejH # Process hierarchy
ps aux --forest # Process tree format

Process Resource Usage

# Resource usage by process
cat /proc/PID/stat # Process statistics
cat /proc/PID/status # Process status information
cat /proc/PID/io # Process I/O statistics
lsof -p PID # Files opened by process

Performance Profiling

CPU Profiling

# perf profiling
perf record -g ./program # Record with call graph
perf report # Analyze recorded data
perf stat -e cycles,instructions ./program # Specific counters
perf top -g # Real-time profiling with call graph

Memory Profiling

# Valgrind profiling
valgrind --tool=massif ./program # Heap profiler
valgrind --tool=callgrind ./program # Call-graph profiler
valgrind --tool=cachegrind ./program # Cache profiler

System Tracing

# System call tracing
strace -c ./program # Count system calls
strace -e trace=file ./program # Trace file operations
strace -e trace=network ./program # Trace network operations

Performance Metrics Collection

Historical Performance Data

# SAR data collection
sar -u 1 86400 > cpu_usage.log # 24 hours of CPU data
sar -r 1 86400 > memory_usage.log # 24 hours of memory data
sar -d 1 86400 > disk_usage.log # 24 hours of disk data

# Automated monitoring
watch -n 1 'free -h' # Monitor memory every second
watch -n 1 'df -h' # Monitor disk usage every second

Performance Baselines

# Create performance baseline
echo "=== System Info ===" > baseline.txt
uname -a >> baseline.txt
lscpu >> baseline.txt
free -h >> baseline.txt
df -h >> baseline.txt
echo "=== Load Average ===" >> baseline.txt
uptime >> baseline.txt

Performance Optimization

CPU Optimization

# CPU governor settings
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
cpufreq-set -g performance # Set performance governor

# Process priority
nice -n -10 ./program # Run with higher priority
renice -n -10 -p PID # Change priority of running process

Memory Optimization

# Memory tuning
echo 1 > /proc/sys/vm/drop_caches # Clear page cache
echo 2 > /proc/sys/vm/drop_caches # Clear dentries and inodes
echo 3 > /proc/sys/vm/drop_caches # Clear all caches

# Swap tuning
sysctl vm.swappiness=10 # Reduce swappiness

I/O Optimization

# I/O scheduler tuning
echo deadline > /sys/block/sda/queue/scheduler
echo mq-deadline > /sys/block/sda/queue/scheduler

# Mount options for performance
mount -o noatime,nodiratime /dev/sda1 /mnt

Performance Alerting

Threshold Monitoring

# CPU threshold
while true; do
load=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1)
if (( $(echo "$load > 4.0" | bc -l) )); then
echo "High load: $load" | mail -s "Performance Alert" admin@domain.com
fi
sleep 60
done

# Memory threshold
while true; do
mem_usage=$(free | awk '/Mem:/ {printf "%.2f", $3/$2 * 100}')
if (( $(echo "$mem_usage > 90" | bc -l) )); then
echo "High memory usage: $mem_usage%" | mail -s "Memory Alert" admin@domain.com
fi
sleep 60
done

Performance Best Practices

Monitoring Strategy

  1. Establish baselines - Know normal performance levels
  2. Monitor continuously - Use automated tools for ongoing monitoring
  3. Set thresholds - Define acceptable performance limits
  4. Investigate anomalies - Dig deeper when metrics exceed thresholds
  5. Document findings - Keep records of performance issues and solutions

Key Metrics to Monitor

  • CPU: Load average, CPU utilization, context switches
  • Memory: Used memory, available memory, swap usage
  • Disk: Disk utilization, I/O wait time, disk space
  • Network: Throughput, packet loss, connection counts
  • Processes: Process count, zombie processes, resource usage

Performance Tuning Workflow

# 1. Identify bottleneck
top && iostat -x 1 1 && free -h

# 2. Analyze specific component
perf record -g ./program # For CPU bottlenecks
valgrind --tool=massif ./program # For memory issues
iotop # For I/O bottlenecks

# 3. Implement optimization
# 4. Measure improvement
# 5. Document changes