Skip to main content

Troubleshooting

Common issues, debugging techniques, performance optimization, and problem-solving strategies for Podman container management.

Common Issues

Installation Problems

Permission Errors

# Error: permission denied
# Solution: Check user namespaces
grep $USER /etc/subuid /etc/subgid

# If missing, add entries
echo "$USER:100000:65536" | sudo tee -a /etc/subuid
echo "$USER:100000:65536" | sudo tee -a /etc/subgid

# Enable user lingering
sudo loginctl enable-linger $USER

# Restart user session
pkill -u $USER

Storage Driver Issues

# Error: storage driver not supported
# Check available drivers
podman info | grep -A 10 "Storage Driver"

# Configure storage driver
mkdir -p ~/.config/containers
cat > ~/.config/containers/storage.conf <<EOF
[storage]
driver = "overlay"
runroot = "/run/user/1000/containers"
graphroot = "/home/$USER/.local/share/containers/storage"
EOF

Container Startup Issues

Image Pull Problems

# Error: image not found
# Check registry configuration
podman info | grep -A 10 registries

# Configure registries
cat > ~/.config/containers/registries.conf <<EOF
[registries.search]
registries = ['docker.io', 'quay.io']
EOF

# Pull with full registry path
podman pull docker.io/library/nginx:latest

Port Binding Issues

# Error: port already in use
# Check what's using the port
ss -tulpn | grep :8080
lsof -i :8080

# Use different port
podman run -p 8081:80 nginx

# Rootless port limitations (< 1024)
# Use port mapping for privileged ports
podman run -p 8080:80 nginx # Instead of -p 80:80

Networking Problems

Container Connectivity

# Test container networking
podman exec container_name ping google.com
podman exec container_name nslookup google.com

# Check container IP
podman inspect container_name | grep IPAddress

# Test inter-container communication
podman exec web ping database
podman exec web telnet database 5432

DNS Resolution Issues

# Check DNS configuration
podman exec container_name cat /etc/resolv.conf

# Custom DNS servers
podman run --dns=8.8.8.8 --dns=8.8.4.4 alpine nslookup google.com

# DNS search domains
podman run --dns-search=company.com alpine

Volume and Storage Issues

Permission Problems

# Check file permissions
podman exec container_name ls -la /data

# Fix ownership issues
podman unshare chown -R 0:0 /host/path
podman run --userns=keep-id -v $(pwd):/data alpine

# SELinux context issues
podman run -v /host/path:/data:Z alpine # Private label
podman run -v /host/path:/data:z alpine # Shared label

Storage Space Issues

# Check storage usage
podman system df
podman system df -v

# Clean up unused data
podman system prune
podman system prune -a --volumes

# Check available space
df -h ~/.local/share/containers/storage

Debugging Techniques

Container Inspection

# Detailed container information
podman inspect container_name

# Specific information
podman inspect --format '{{.State.Status}}' container_name
podman inspect --format '{{.NetworkSettings.IPAddress}}' container_name
podman inspect --format '{{.Config.Env}}' container_name

# Container processes
podman top container_name
podman exec container_name ps aux

Log Analysis

# Container logs
podman logs container_name
podman logs -f container_name # Follow logs
podman logs --tail 50 container_name # Last 50 lines
podman logs --since 1h container_name # Last hour
podman logs --until 2023-01-01 container_name

# System logs
journalctl -u user@$UID.service
journalctl -f -u user@$UID.service

Process Debugging

# Container process tree
podman exec container_name ps auxf

# System resources
podman stats container_name
podman stats --no-stream container_name

# File system changes
podman diff container_name

# Container events
podman events --filter container=container_name

Network Debugging

# Network configuration
podman inspect container_name | grep -A 20 NetworkSettings

# Port mappings
podman port container_name

# Network connectivity
podman exec container_name netstat -tulpn
podman exec container_name ss -tulpn

# Test connectivity
podman exec container_name ping -c 3 google.com
podman exec container_name curl -v http://example.com

Performance Issues

Resource Monitoring

# Real-time resource usage
podman stats
podman stats --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}"

# Historical resource usage
podman exec container_name cat /proc/meminfo
podman exec container_name cat /proc/cpuinfo
podman exec container_name cat /proc/loadavg

Memory Issues

# Check memory usage
podman stats --format "table {{.Container}}\t{{.MemUsage}}\t{{.MemPerc}}"

# Memory limits
podman inspect container_name | grep -A 5 Memory

# Out of memory debugging
podman exec container_name dmesg | grep -i "killed process"
journalctl -u user@$UID.service | grep -i "memory"

# Optimize memory usage
podman run --memory=512m --memory-swap=512m container_name

CPU Performance

# CPU usage monitoring
podman stats --format "table {{.Container}}\t{{.CPUPerc}}"

# CPU limits
podman inspect container_name | grep -A 5 Cpu

# CPU-intensive process identification
podman exec container_name top
podman exec container_name htop

# CPU optimization
podman run --cpus=2.0 container_name
podman run --cpuset-cpus=0,1 container_name

I/O Performance

# I/O statistics
podman stats --format "table {{.Container}}\t{{.BlockIO}}\t{{.NetIO}}"

# Disk usage
podman exec container_name df -h
podman exec container_name du -sh /var/log

# I/O optimization
podman run --device-read-bps /dev/sda:1mb container_name
podman run --device-write-bps /dev/sda:1mb container_name

System Diagnostics

System Information

# Podman system info
podman info

# Version information
podman version
podman --version

# System events
podman events
podman events --filter type=container
podman events --filter event=die

Storage Diagnostics

# Storage driver information
podman info | grep -A 15 "Storage Driver"

# Storage usage
podman system df
podman system df -v

# Storage configuration
cat ~/.config/containers/storage.conf

Network Diagnostics

# Network information
podman network ls
podman network inspect bridge

# Network troubleshooting tools
podman run --rm -it --network=host nicolaka/netshoot
podman run --rm -it --pid=host --network=host nicolaka/netshoot

Error Resolution

Common Error Messages

"Permission denied"

# Check user namespaces
grep $USER /etc/subuid /etc/subgid

# Fix permissions
sudo usermod --add-subuids 100000-165535 $USER
sudo usermod --add-subgids 100000-165535 $USER

"No space left on device"

# Check disk space
df -h ~/.local/share/containers/storage

# Clean up
podman system prune -a --volumes
podman image prune -a

"Port already in use"

# Find process using port
lsof -i :8080
ss -tulpn | grep :8080

# Kill process or use different port
podman run -p 8081:80 nginx

"Network not found"

# List networks
podman network ls

# Create network
podman network create mynetwork

Recovery Procedures

Container Recovery

# Stop and restart container
podman stop container_name
podman start container_name

# Recreate container
podman stop container_name
podman rm container_name
podman run -d --name container_name nginx

# Restore from backup
podman import backup.tar restored_image
podman run -d restored_image

System Recovery

# Reset Podman system (use with caution)
podman system reset

# Restore configuration
cp backup/storage.conf ~/.config/containers/
cp backup/registries.conf ~/.config/containers/

# Recreate containers
podman play kube backup.yaml

Performance Optimization

Container Optimization

# Use multi-stage builds
FROM node:18-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production

FROM node:18-alpine AS runtime
COPY --from=builder /app/node_modules ./node_modules
COPY . .
CMD ["npm", "start"]

# Optimize image layers
RUN apk update && apk add --no-cache curl && rm -rf /var/cache/apk/*

Resource Optimization

# Set appropriate limits
podman run --memory=512m --cpus=1.0 nginx

# Use resource-efficient base images
FROM alpine:3.18 # 5MB
FROM debian:bullseye-slim # 80MB
FROM scratch # 0MB (for static binaries)

Storage Optimization

# Use efficient storage driver
[storage]
driver = "overlay"
mount_program = "/usr/bin/fuse-overlayfs"

# Regular cleanup
podman system prune -a --volumes
podman image prune -a

Monitoring and Alerting

Health Checks

# Add health check to container
podman run -d \
--health-cmd="curl -f http://localhost:80 || exit 1" \
--health-interval=30s \
--health-timeout=10s \
--health-retries=3 \
nginx

# Check health status
podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Healthcheck}}"

Automated Monitoring

#!/bin/bash
# monitor-containers.sh

# Check container health
for container in $(podman ps --format "{{.Names}}"); do
health=$(podman inspect --format "{{.State.Health.Status}}" $container 2>/dev/null)
if [ "$health" = "unhealthy" ]; then
echo "ALERT: Container $container is unhealthy"
podman logs --tail 20 $container
fi
done

# Check resource usage
podman stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}"

Log Monitoring

# Centralized logging
podman run -d \
--log-driver=journald \
--log-opt labels=app=myapp \
nginx

# Monitor logs
journalctl -f CONTAINER_NAME=nginx

Best Practices for Troubleshooting

Systematic Approach

  1. Identify the problem - What exactly is failing?
  2. Gather information - Logs, system info, resource usage
  3. Isolate the issue - Test individual components
  4. Implement solution - Apply fixes incrementally
  5. Verify resolution - Confirm the problem is solved
  6. Document - Record solution for future reference

Debugging Workflow

# 1. Check container status
podman ps -a

# 2. Review logs
podman logs container_name

# 3. Inspect configuration
podman inspect container_name

# 4. Test connectivity
podman exec container_name ping google.com

# 5. Check resources
podman stats container_name

# 6. Verify volumes
podman exec container_name ls -la /data

Prevention Strategies

# Regular maintenance
podman system prune --volumes
podman image prune -a

# Health monitoring
podman run --health-cmd="curl -f http://localhost || exit 1" nginx

# Resource limits
podman run --memory=512m --cpus=1.0 nginx

# Backup important data
podman run --rm -v myvolume:/data -v $(pwd):/backup \
alpine tar czf /backup/backup.tar.gz /data

Quick Reference

Essential Debugging Commands

# Container inspection
podman ps -a
podman logs container_name
podman inspect container_name
podman exec -it container_name sh

# System diagnostics
podman info
podman system df
podman events

# Network debugging
podman network ls
podman port container_name
podman exec container_name netstat -tulpn

# Performance monitoring
podman stats
podman top container_name

Common Solutions

# Permission issues
podman unshare chown -R 0:0 /path
podman run --userns=keep-id container

# Network issues
podman run --dns=8.8.8.8 container
podman network create mynet

# Storage issues
podman system prune -a --volumes
podman run -v /host/path:/data:Z container

# Resource issues
podman run --memory=512m --cpus=1.0 container