Skip to main content

MongoDB Monitoring

Server Status Monitoring

Basic Server Metrics

// Server status overview
db.serverStatus();

// Memory usage
db.serverStatus().mem;

// Connection statistics
db.serverStatus().connections;

// Operation counters
db.serverStatus().opcounters;

// Network metrics
db.serverStatus().network;

// Storage engine stats (WiredTiger)
db.serverStatus().wiredTiger;

Database and Collection Stats

// Database statistics
db.stats();
db.stats(1024*1024); // Stats in MB

// Collection statistics
db.users.stats();
db.users.stats(1024*1024); // Stats in MB

// Index statistics
db.users.aggregate([{$indexStats: {}}]);

// List all collections with sizes
db.runCommand("listCollections").cursor.firstBatch.forEach(
function(collection) {
print(collection.name + ": " +
(db[collection.name].stats().size / 1024 / 1024).toFixed(2) + " MB");
}
);

Performance Monitoring

Current Operations

// View current operations
db.currentOp();

// Filter long-running operations
db.currentOp({"secs_running": {$gt: 5}});

// Kill long-running operation
db.killOp(123456); // Use opid from currentOp

// Find operations by user
db.currentOp({"user": "appUser"});

// Find operations by database
db.currentOp({"ns": /^myapp\./});

Query Profiling

// Enable profiling for slow queries
db.setProfilingLevel(1, {slowms: 100});

// View profiler data
db.system.profile.find().sort({ts: -1}).limit(5).pretty();

// Find slowest queries
db.system.profile.find({"millis": {$exists: true}}).sort({"millis": -1}).limit(5);

// Profile data by collection
db.system.profile.aggregate([
{$group: {
_id: "$ns",
count: {$sum: 1},
avgTime: {$avg: "$millis"},
maxTime: {$max: "$millis"}
}},
{$sort: {avgTime: -1}}
]);

// Clear profiler collection
db.system.profile.drop();

Real-time Monitoring

# mongostat - real-time server statistics
mongostat --host localhost:27017 -u admin -p password

# mongotop - collection-level statistics
mongotop --host localhost:27017 -u admin -p password

# Custom mongostat fields
mongostat --host localhost:27017 -o 'host,time,insert,query,update,delete,getmore,command'

Replica Set Monitoring

Replica Set Status

// Replica set status
rs.status();

// Replica set configuration
rs.conf();

// Check if node is primary
rs.isMaster();

// Replication lag information
db.printSlaveReplicationInfo();

// Replication info
db.getReplicationInfo();

Oplog Monitoring

// Switch to local database
use local;

// Oplog statistics
db.oplog.rs.stats();

// Oplog size and usage
var oplogStats = db.oplog.rs.stats();
print("Oplog size: " + (oplogStats.maxSize / 1024 / 1024 / 1024).toFixed(2) + " GB");
print("Oplog used: " + (oplogStats.size / 1024 / 1024 / 1024).toFixed(2) + " GB");

// Recent oplog entries
db.oplog.rs.find().sort({$natural: -1}).limit(5);

// Check oplog window
var first = db.oplog.rs.find().sort({$natural: 1}).limit(1).next();
var last = db.oplog.rs.find().sort({$natural: -1}).limit(1).next();
var windowHours = (last.ts.getTime() - first.ts.getTime()) / 1000 / 60 / 60;
print("Oplog window: " + windowHours.toFixed(2) + " hours");

Sharding Monitoring

Shard Status

// Sharding status overview
sh.status();

// Balancer status
sh.getBalancerState();
sh.isBalancerRunning();

// Chunk distribution
db.chunks.aggregate([
{$group: {_id: "$shard", count: {$sum: 1}}},
{$sort: {count: -1}}
]);

// Migration history
db.changelog.find({"what": /moveChunk/}).sort({"time": -1}).limit(10);

// Failed migrations
db.actionlog.find({"what": /migrate/, "details.errmsg": {$exists: true}});

mongos Monitoring

// Connected to mongos
db.isMaster();

// Shard connection pool stats
db.runCommand("connPoolStats");

// Config server connection
db.runCommand("getCmdLineOpts").parsed.sharding.configDB;

Custom Monitoring Scripts

Health Check Script

// Comprehensive health check
function mongoHealthCheck() {
var health = {
timestamp: new Date(),
server: {},
replica: {},
performance: {},
issues: []
};

try {
// Server health
var serverStatus = db.serverStatus();
health.server = {
uptime: serverStatus.uptime,
connections: serverStatus.connections,
memory: serverStatus.mem,
opcounters: serverStatus.opcounters
};

// Connection usage
var connPct = (serverStatus.connections.current /
(serverStatus.connections.current + serverStatus.connections.available)) * 100;
if (connPct > 80) {
health.issues.push("High connection usage: " + connPct.toFixed(1) + "%");
}

// Memory usage
if (serverStatus.mem.resident > serverStatus.mem.virtual * 0.8) {
health.issues.push("High memory usage");
}

// Replica set health (if applicable)
try {
var rsStatus = rs.status();
health.replica = {
name: rsStatus.set,
members: rsStatus.members.length,
primary: rsStatus.members.find(m => m.state === 1)?.name
};

// Check for unhealthy members
rsStatus.members.forEach(function(member) {
if (member.health !== 1) {
health.issues.push("Unhealthy replica member: " + member.name);
}
if (member.state === 2) { // Secondary
var lag = (rsStatus.date - member.optimeDate) / 1000;
if (lag > 10) {
health.issues.push(member.name + " replication lag: " + lag + "s");
}
}
});
} catch (e) {
// Not a replica set
}

// Performance checks
var stats = db.stats();
health.performance = {
avgObjSize: stats.avgObjSize,
indexSize: stats.indexSize,
dataSize: stats.dataSize
};

if (stats.indexSize > stats.dataSize) {
health.issues.push("Index size larger than data size");
}

} catch (error) {
health.issues.push("Health check error: " + error.message);
}

return health;
}

// Run health check
var healthResult = mongoHealthCheck();
print("MongoDB Health Check:");
print("Issues found: " + healthResult.issues.length);
healthResult.issues.forEach(issue => print("- " + issue));

Performance Monitoring Script

// Performance metrics collection
function collectPerformanceMetrics() {
var metrics = {
timestamp: new Date(),
server: {},
operations: {},
slowQueries: [],
indexUsage: {}
};

var serverStatus = db.serverStatus();

// Server metrics
metrics.server = {
uptime: serverStatus.uptime,
connections: serverStatus.connections.current,
memory: {
resident: serverStatus.mem.resident,
virtual: serverStatus.mem.virtual,
mapped: serverStatus.mem.mapped
},
cache: serverStatus.wiredTiger?.cache || {}
};

// Operation metrics
metrics.operations = {
insert: serverStatus.opcounters.insert,
query: serverStatus.opcounters.query,
update: serverStatus.opcounters.update,
delete: serverStatus.opcounters.delete,
command: serverStatus.opcounters.command
};

// Slow queries (last 5 minutes)
var fiveMinutesAgo = new Date(Date.now() - 5 * 60 * 1000);
metrics.slowQueries = db.system.profile.find({
"ts": {$gte: fiveMinutesAgo},
"millis": {$gt: 100}
}).sort({"millis": -1}).limit(10).toArray();

// Index usage statistics
db.adminCommand("listCollections").cursor.firstBatch.forEach(
function(collection) {
if (!collection.name.startsWith("system.")) {
try {
var indexStats = db[collection.name].aggregate([{$indexStats: {}}]).toArray();
metrics.indexUsage[collection.name] = indexStats;
} catch (e) {
// Skip collections that can't be accessed
}
}
}
);

return metrics;
}

// Collect and display metrics
var perfMetrics = collectPerformanceMetrics();
print("Performance Metrics:");
print("Active connections: " + perfMetrics.server.connections);
print("Memory usage: " + perfMetrics.server.memory.resident + " MB");
print("Slow queries (last 5 min): " + perfMetrics.slowQueries.length);

External Monitoring Tools

MongoDB Ops Manager

// Install Ops Manager monitoring agent
// Configure monitoring in ops manager UI

// Custom metrics via HTTP API
var metricsData = {
"timestamp": new Date(),
"hostname": "mongodb-server-1",
"metrics": {
"connections": db.serverStatus().connections.current,
"opcounters": db.serverStatus().opcounters,
"memory": db.serverStatus().mem
}
};

// Send to monitoring system (pseudo-code)
// HTTP.post("http://monitoring-api/metrics", metricsData);

Prometheus Integration

# mongodb_exporter configuration
global:
scrape_interval: 15s

scrape_configs:
- job_name: 'mongodb'
static_configs:
- targets: ['localhost:9216']
scrape_interval: 15s
metrics_path: /metrics
# Start MongoDB exporter for Prometheus
mongodb_exporter --mongodb.uri="mongodb://user:pass@localhost:27017" \
--web.listen-address=":9216"

Custom Alerting

// Alert conditions
function checkAlertConditions() {
var alerts = [];
var serverStatus = db.serverStatus();

// High connection usage
var connUsage = serverStatus.connections.current /
(serverStatus.connections.current + serverStatus.connections.available);
if (connUsage > 0.9) {
alerts.push({
severity: "critical",
message: "Connection usage above 90%: " + (connUsage * 100).toFixed(1) + "%"
});
}

// High memory usage
if (serverStatus.mem.resident > 8192) { // 8GB
alerts.push({
severity: "warning",
message: "Memory usage high: " + serverStatus.mem.resident + " MB"
});
}

// Replication lag
try {
var rsStatus = rs.status();
rsStatus.members.forEach(function(member) {
if (member.state === 2) { // Secondary
var lag = (rsStatus.date - member.optimeDate) / 1000;
if (lag > 30) {
alerts.push({
severity: "warning",
message: member.name + " replication lag: " + lag + " seconds"
});
}
}
});
} catch (e) {
// Not a replica set
}

// Slow queries
var slowQueries = db.system.profile.find({
"ts": {$gte: new Date(Date.now() - 60000)}, // Last minute
"millis": {$gt: 5000} // > 5 seconds
}).count();

if (slowQueries > 0) {
alerts.push({
severity: "warning",
message: slowQueries + " slow queries detected in last minute"
});
}

return alerts;
}

// Check and process alerts
var currentAlerts = checkAlertConditions();
currentAlerts.forEach(function(alert) {
print("[" + alert.severity.toUpperCase() + "] " + alert.message);
// Send to alerting system
});

Log Analysis

MongoDB Log Parsing

# Find slow queries in logs
grep "slow operation" /var/log/mongodb/mongod.log

# Connection events
grep "connection" /var/log/mongodb/mongod.log

# Index usage warnings
grep "index" /var/log/mongodb/mongod.log

# Memory warnings
grep -i "memory" /var/log/mongodb/mongod.log

Log Analysis Script

// Analyze MongoDB logs (pseudo-code for shell script integration)
function analyzeMongoLogs() {
// This would typically be implemented as a shell script
// that processes MongoDB log files

var logAnalysis = {
slowQueries: 0,
connections: {
opened: 0,
closed: 0,
failed: 0
},
errors: [],
warnings: []
};

// Parse log file and populate analysis
// Implementation depends on log format and parsing tools

return logAnalysis;
}

Monitoring Dashboard Queries

Key Metrics for Dashboards

// Connection metrics
var connectionMetrics = {
current: db.serverStatus().connections.current,
available: db.serverStatus().connections.available,
totalCreated: db.serverStatus().connections.totalCreated
};

// Operation metrics
var opMetrics = db.serverStatus().opcounters;

// Memory metrics
var memMetrics = db.serverStatus().mem;

// Storage metrics
var storageMetrics = {
dataSize: db.stats().dataSize,
indexSize: db.stats().indexSize,
storageSize: db.stats().storageSize
};

// Combine for dashboard
var dashboardData = {
timestamp: new Date(),
connections: connectionMetrics,
operations: opMetrics,
memory: memMetrics,
storage: storageMetrics
};

print(JSON.stringify(dashboardData, null, 2));